R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00
.libPaths()
## [1] "C:/Users/fxyin/AppData/Local/R/win-library/4.3"
## [2] "C:/Program Files/R/R-4.3.3/library"
########import packages and seurat objects of dataset###########
suppressMessages(library(plotly))
suppressMessages(library(Seurat))
suppressMessages(library(dplyr))
suppressMessages(library(Matrix))
suppressMessages(library(gplots))
library(readxl)
library(devtools)
## Loading required package: usethis
library(clustree)
## Loading required package: ggraph
## 
## Attaching package: 'ggraph'
## The following object is masked from 'package:sp':
## 
##     geometry
library(RColorBrewer) 
library(reshape)
## 
## Attaching package: 'reshape'
## The following object is masked from 'package:Matrix':
## 
##     expand
## The following object is masked from 'package:dplyr':
## 
##     rename
## The following object is masked from 'package:plotly':
## 
##     rename
library(xlsx)

setwd("D:/xueying-work/afterPHD/Liu-lab/project1-embryo benchmarking")

##load data

raw_list <- readRDS("D:/xueying-work/afterPHD/Liu-lab/project1-embryo benchmarking/dataset/monkey_list5.0.rds")

raw_list
## $Sasaki_paper
## An object of class Seurat 
## 28551 features across 34 samples within 1 assay 
## Active assay: RNA (28551 features, 0 variable features)
##  1 layer present: counts
## 
## $Nakamura_paper
## An object of class Seurat 
## 28797 features across 421 samples within 1 assay 
## Active assay: RNA (28797 features, 0 variable features)
##  1 layer present: counts
## 
## $Zhai_2022
## An object of class Seurat 
## 21584 features across 66702 samples within 1 assay 
## Active assay: RNA (21584 features, 0 variable features)
##  7 layers present: counts.1.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.2.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Zhai_2022.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Zhai_2022.SeuratProject.SeuratProject.SeuratProject, counts.Zhai_2022.SeuratProject.SeuratProject, counts.Zhai_2022.SeuratProject, counts.Zhai_2022
## 
## $Ma
## An object of class Seurat 
## 29324 features across 1012681 samples within 1 assay 
## Active assay: RNA (29324 features, 0 variable features)
##  21 layers present: counts.1.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.2.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Ma.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Ma.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Ma.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Ma.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Ma.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Ma.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Ma.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Ma.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Ma.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Ma.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Ma.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Ma.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Ma.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Ma.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Ma.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Ma.SeuratProject.SeuratProject.SeuratProject, counts.Ma.SeuratProject.SeuratProject, counts.Ma.SeuratProject, counts.Ma
## 
## $Ma_paper
## An object of class Seurat 
## 30792 features across 1453 samples within 1 assay 
## Active assay: RNA (30792 features, 0 variable features)
##  1 layer present: counts
## 
## $Niu_paper
## An object of class Seurat 
## 28592 features across 600 samples within 1 assay 
## Active assay: RNA (28592 features, 0 variable features)
##  1 layer present: counts
## 
## $Yang
## An object of class Seurat 
## 21584 features across 16857 samples within 1 assay 
## Active assay: RNA (21584 features, 0 variable features)
##  12 layers present: counts.1.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.2.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Yang.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Yang.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Yang.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Yang.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Yang.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Yang.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Yang.SeuratProject.SeuratProject.SeuratProject, counts.Yang.SeuratProject.SeuratProject, counts.Yang.SeuratProject, counts.Yang
## 
## $Zhai_2023
## An object of class Seurat 
## 29324 features across 1646849 samples within 1 assay 
## Active assay: RNA (29324 features, 0 variable features)
##  44 layers present: counts.1.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.2.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Zhai_2023.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Zhai_2023.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Zhai_2023.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Zhai_2023.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Zhai_2023.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Zhai_2023.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Zhai_2023.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Zhai_2023.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Zhai_2023.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Zhai_2023.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Zhai_2023.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Zhai_2023.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Zhai_2023.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Zhai_2023.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Zhai_2023.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Zhai_2023.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Zhai_2023.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Zhai_2023.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Zhai_2023.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Zhai_2023.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Zhai_2023.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Zhai_2023.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Zhai_2023.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Zhai_2023.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Zhai_2023.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Zhai_2023.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Zhai_2023.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Zhai_2023.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Zhai_2023.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Zhai_2023.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Zhai_2023.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Zhai_2023.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Zhai_2023.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Zhai_2023.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Zhai_2023.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Zhai_2023.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Zhai_2023.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Zhai_2023.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Zhai_2023.SeuratProject.SeuratProject.SeuratProject, counts.Zhai_2023.SeuratProject.SeuratProject, counts.Zhai_2023.SeuratProject, counts.Zhai_2023
## 
## $Gong
## An object of class Seurat 
## 21584 features across 153641 samples within 1 assay 
## Active assay: RNA (21584 features, 0 variable features)
##  24 layers present: counts.1.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.2.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Gong.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Gong.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Gong.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Gong.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Gong.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Gong.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Gong.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Gong.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Gong.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Gong.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Gong.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Gong.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Gong.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Gong.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Gong.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Gong.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Gong.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Gong.SeuratProject.SeuratProject.SeuratProject.SeuratProject, counts.Gong.SeuratProject.SeuratProject.SeuratProject, counts.Gong.SeuratProject.SeuratProject, counts.Gong.SeuratProject, counts.Gong
## 
## $Li
## An object of class Seurat 
## 21584 features across 31874 samples within 1 assay 
## Active assay: RNA (21584 features, 0 variable features)
##  2 layers present: counts.1, counts.2

##data overview

#check cells by stage
age <- do.call(rbind, lapply(1:length(raw_list), function(x) {
  
df <- as.data.frame(table(raw_list[[x]]$stage) ) 
 df$dataset <- names(raw_list[x])
  
 return(df)
}))

#check table
print(age)
##        Var1   Freq        dataset
## 1       E13      3   Sasaki_paper
## 2       E14      1   Sasaki_paper
## 3       E16     11   Sasaki_paper
## 4       E17      2   Sasaki_paper
## 5       E20      8   Sasaki_paper
## 6      E36m      6   Sasaki_paper
## 7      E47m      1   Sasaki_paper
## 8      E55m      2   Sasaki_paper
## 9      C6FF      9 Nakamura_paper
## 10     C6oF     14 Nakamura_paper
## 11     C9oF      8 Nakamura_paper
## 12      E06     43 Nakamura_paper
## 13      E07     35 Nakamura_paper
## 14      E08     95 Nakamura_paper
## 15      E09     20 Nakamura_paper
## 16      E13     46 Nakamura_paper
## 17      E14     62 Nakamura_paper
## 18      E16     36 Nakamura_paper
## 19      E17     53 Nakamura_paper
## 20  CS11-e1  11360      Zhai_2022
## 21  CS11-e2  14288      Zhai_2022
## 22 CS11-ys1   6859      Zhai_2022
## 23   CS8-e1   5892      Zhai_2022
## 24   CS8-e2   8958      Zhai_2022
## 25   CS9-e1  10024      Zhai_2022
## 26   CS9-e2   9321      Zhai_2022
## 27    day11 107647             Ma
## 28    day12 148279             Ma
## 29    Day12  50346             Ma
## 30    day13  48308             Ma
## 31    Day13  88477             Ma
## 32    day14 206006             Ma
## 33    day16 211473             Ma
## 34    day17  54193             Ma
## 35    Day17  97952             Ma
## 36 d.p.f.11    154       Ma_paper
## 37 d.p.f.12    249       Ma_paper
## 38 d.p.f.13    162       Ma_paper
## 39 d.p.f.14    322       Ma_paper
## 40 d.p.f.16    336       Ma_paper
## 41 d.p.f.17    230       Ma_paper
## 42      E09     36      Niu_paper
## 43      E11     57      Niu_paper
## 44      E13     96      Niu_paper
## 45      E15    121      Niu_paper
## 46      E17    140      Niu_paper
## 47      E19     66      Niu_paper
## 48      E20     84      Niu_paper
## 49      D10   2993           Yang
## 50      D12   3157           Yang
## 51      D14  10707           Yang
## 52      E16 329757      Zhai_2023
## 53      E18 137172      Zhai_2023
## 54      E20 248878      Zhai_2023
## 55      E21 184546      Zhai_2023
## 56      E22 155744      Zhai_2023
## 57      E23 294811      Zhai_2023
## 58      E24 102855      Zhai_2023
## 59      E25 193086      Zhai_2023
## 60      E18  21478           Gong
## 61      E19  16739           Gong
## 62      E20  14934           Gong
## 63      E21  11828           Gong
## 64      E22  11757           Gong
## 65      E23  39142           Gong
## 66      E24  20297           Gong
## 67      E25  17466           Gong
## 68      D17  10543             Li
## 69       D7  21331             Li

##data organize

##remove Ma from the list
raw_list <- raw_list[!names(raw_list) %in% "Ma"]
##remove Zhai_2023 from the list
raw_list <- raw_list[!names(raw_list) %in% "Zhai_2023"]
##remove Zhai_2023 from the list
raw_list <- raw_list[!names(raw_list) %in% "Li"]

raw_list$Zhai_2022 <- JoinLayers(raw_list$Zhai_2022)
raw_list$Yang <- JoinLayers(raw_list$Yang)
raw_list$Gong <- JoinLayers(raw_list$Gong)

#filter cells with thresholds
for (i in seq_along(raw_list)) {
  
  # Access the metadata
  metadata <- raw_list[[i]]@meta.data
  
  # Apply the filter to the metadata and identify cells to keep
  cells_to_keep <- colnames(raw_list[[i]])[metadata$nFeature_RNA > 500 & metadata$nCount_RNA > 1000 & metadata$percent.mt < 15]
  
  # Check if any cells meet the criteria
  if (length(cells_to_keep) > 0) {
    # Subset the Seurat object to keep only the desired cells
    raw_list[[i]] <- subset(raw_list[[i]], cells = cells_to_keep)
  } else {
    message(paste("No cells found for raw_list element:", i))
  }
}
## No cells found for raw_list element: 4
# Verify the result
print(raw_list)
## $Sasaki_paper
## An object of class Seurat 
## 28551 features across 34 samples within 1 assay 
## Active assay: RNA (28551 features, 0 variable features)
##  1 layer present: counts
## 
## $Nakamura_paper
## An object of class Seurat 
## 28797 features across 421 samples within 1 assay 
## Active assay: RNA (28797 features, 0 variable features)
##  1 layer present: counts
## 
## $Zhai_2022
## An object of class Seurat 
## 21584 features across 64575 samples within 1 assay 
## Active assay: RNA (21584 features, 0 variable features)
##  1 layer present: counts
## 
## $Ma_paper
## An object of class Seurat 
## 30792 features across 1453 samples within 1 assay 
## Active assay: RNA (30792 features, 0 variable features)
##  1 layer present: counts
## 
## $Niu_paper
## An object of class Seurat 
## 28592 features across 600 samples within 1 assay 
## Active assay: RNA (28592 features, 0 variable features)
##  1 layer present: counts
## 
## $Yang
## An object of class Seurat 
## 21584 features across 15503 samples within 1 assay 
## Active assay: RNA (21584 features, 0 variable features)
##  1 layer present: counts
## 
## $Gong
## An object of class Seurat 
## 21584 features across 134022 samples within 1 assay 
## Active assay: RNA (21584 features, 0 variable features)
##  1 layer present: counts
# check cell numbers
cell_counts <- data.frame(Dataset = integer(), Cell_Count = integer())

for (i in seq_along(raw_list)) {
  
  # Get the number of cells in the current Seurat object
  cell_count <- ncol(raw_list[[i]])
  
  # Store the result in the data frame
  cell_counts <- rbind(cell_counts, data.frame(Dataset = names(raw_list[i]), Cell_Count = cell_count))
}

# Print the table of cell counts
print(cell_counts)
##          Dataset Cell_Count
## 1   Sasaki_paper         34
## 2 Nakamura_paper        421
## 3      Zhai_2022      64575
## 4       Ma_paper       1453
## 5      Niu_paper        600
## 6           Yang      15503
## 7           Gong     134022

##sasaki dataset

##check colnames
colnames(raw_list$Sasaki_paper)[1:5]
## [1] "E20_MS08T78" "E20_MS08T79" "E20_MS08T81" "E20_MS08T82" "E20_MS08T83"
ncol(raw_list$Sasaki_paper)
## [1] 34
##add annotation
sasaki_info <- read_excel("D:/xueying-work/afterPHD/Liu-lab/project1-embryo benchmarking/2024April/data/Monkey-Sasaki/mmc2 (1).xlsx")

#match colnames
df <- sasaki_info[which(sasaki_info$SampleID %in% rownames(raw_list$Sasaki_paper@meta.data)),]
df <- df[match(rownames(raw_list$Sasaki_paper@meta.data), df$SampleID),]

raw_list$Sasaki_paper@meta.data$anno <- df$Group

####check plots############
VlnPlot(raw_list$Sasaki_paper, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.

VlnPlot(raw_list$Sasaki_paper, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.

##remove E36m, E47m and E55m
raw_list$Sasaki_paper@meta.data$stage <- as.character(raw_list$Sasaki_paper@meta.data$stage)
meta_data <- raw_list$Sasaki_paper@meta.data

# Identify the cells to keep by creating a logical vector
cells_to_keep <- rownames(meta_data[!meta_data$stage %in% c("E36m", "E47m", "E55m"), ])

# Subset the Seurat object using the identified cells
raw_list$Sasaki_paper <- subset(raw_list$Sasaki_paper, cells = cells_to_keep)


meta <- raw_list$Sasaki_paper@meta.data
df2 <- as.data.frame(table(meta$stage, meta$anno))

##plot###n_cells 
ggplot(data=df2, aes(x=Var1, y=Freq, fill=Var2)) +
    geom_bar(stat="identity")+ 
  theme(text=element_text(size=30))

##nakamura dataset

##check colnames
colnames(raw_list$Nakamura_paper)[1:5]
## [1] "C6FF_MS11T85" "C6FF_MS11T86" "C6FF_MS11T87" "C6FF_MS11T88" "C6FF_MS11T89"
ncol(raw_list$Nakamura_paper)
## [1] 421
##add annotation
Nakamura_info <- read_excel("D:/xueying-work/afterPHD/Liu-lab/project1-embryo benchmarking/2024April/data/Monkey-Nakamura/Nakamura.xlsx")
## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
colnames(Nakamura_info) <-Nakamura_info[1,] 
Nakamura_info <- Nakamura_info[-1,]

Nakamura_info <- Nakamura_info[which(Nakamura_info$Species=="Cynomolgus Monkey"),]

#match colnames
anno_table <- Nakamura_info
dataset <- raw_list$Nakamura_paper

length(Nakamura_info$SampleID %in% colnames(Nakamura_info))
## [1] 421
df <- Nakamura_info[match(rownames(raw_list$Nakamura_paper@meta.data), Nakamura_info$SampleID),]
raw_list$Nakamura_paper@meta.data$anno <- df$Group

dataset <- raw_list$Nakamura_paper
dataset <- subset(dataset, subset= anno != "cyESCFF" & anno != "cyESCoF"  )

raw_list$Nakamura_paper <- dataset 


####check plots############
VlnPlot(raw_list$Nakamura_paper, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.

raw_list$Nakamura_paper@meta.data$stage <- as.character(raw_list$Nakamura_paper@meta.data$stage) 
raw_list$Nakamura_paper@meta.data$stage <- as.factor(raw_list$Nakamura_paper@meta.data$stage)
levels(raw_list$Nakamura_paper@meta.data$stage)
## [1] "E06" "E07" "E08" "E09" "E13" "E14" "E16" "E17"
meta <- raw_list$Nakamura_paper@meta.data
df2 <- as.data.frame(table(meta$stage, meta$anno))


##plot###n_cells before QC
ggplot(data=df2, aes(x=Var1, y=Freq, fill=Var2)) +
    geom_bar(stat="identity")+ 
  theme(text=element_text(size=30))

#Ma paper

raw_list$Ma_paper$embryo <- raw_list$Ma_paper$orig.ident
raw_list$Ma_paper$orig.ident <- "Ma"

raw_list$Ma_paper[["percent.mt"]] <- PercentageFeatureSet(raw_list$Ma_paper, pattern = "^MT.")

VlnPlot(raw_list$Ma_paper, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"),  group.by = "orig.ident", ncol = 3)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.

meta <- raw_list$Ma_paper@meta.data
df2 <- as.data.frame(table(meta$stage, meta$anno))

##plot###n_cells before QC
ggplot(data=df2, aes(x=Var1, y=Freq, fill=Var2)) +
    geom_bar(stat="identity")+ 
  theme(text=element_text(size=30))

##Niu paper #have problem with gene names so fix this first

genes <- as.data.frame(rownames(raw_list$Niu_paper)) 
genes_2 <- genes[grep("ENSMFAG", genes[,1]),,drop=FALSE]
genes.rest <- genes[-grep("ENSMFAG", genes[,1]),,drop=FALSE]

######use both mfascicularis and hsapiens biomart#######

library(biomaRt)

ensembl <- useEnsembl(biomart = "ensembl", mirror = "asia") # Use the Asia mirror
## Ensembl site unresponsive, trying www mirror
listEnsembl()
##         biomart                version
## 1         genes      Ensembl Genes 112
## 2 mouse_strains      Mouse strains 112
## 3          snps  Ensembl Variation 112
## 4    regulation Ensembl Regulation 112
listDatasets(ensembl)
##                            dataset
## 1     abrachyrhynchus_gene_ensembl
## 2         acalliptera_gene_ensembl
## 3       acarolinensis_gene_ensembl
## 4        acchrysaetos_gene_ensembl
## 5        acitrinellus_gene_ensembl
## 6        amelanoleuca_gene_ensembl
## 7          amexicanus_gene_ensembl
## 8          anancymaae_gene_ensembl
## 9          aocellaris_gene_ensembl
## 10           apercula_gene_ensembl
## 11     aplatyrhynchos_gene_ensembl
## 12      apolyacanthus_gene_ensembl
## 13    applatyrhynchos_gene_ensembl
## 14       atestudineus_gene_ensembl
## 15            bbbison_gene_ensembl
## 16         bgrunniens_gene_ensembl
## 17           bihybrid_gene_ensembl
## 18          bmusculus_gene_ensembl
## 19             bmutus_gene_ensembl
## 20         bsplendens_gene_ensembl
## 21            btaurus_gene_ensembl
## 22        cabingdonii_gene_ensembl
## 23              catys_gene_ensembl
## 24           cauratus_gene_ensembl
## 25           cccarpio_gene_ensembl
## 26       cdromedarius_gene_ensembl
## 27           celegans_gene_ensembl
## 28        cgchok1gshd_gene_ensembl
## 29             cgobio_gene_ensembl
## 30          charengus_gene_ensembl
## 31            chircus_gene_ensembl
## 32         choffmanni_gene_ensembl
## 33     chyarkandensis_gene_ensembl
## 34          cimitator_gene_ensembl
## 35      cintestinalis_gene_ensembl
## 36           cjacchus_gene_ensembl
## 37          cjaponica_gene_ensembl
## 38          clanigera_gene_ensembl
## 39            cldingo_gene_ensembl
## 40       clfamiliaris_gene_ensembl
## 41            clumpus_gene_ensembl
## 42             cmilii_gene_ensembl
## 43           cpbellii_gene_ensembl
## 44         cporcellus_gene_ensembl
## 45           cporosus_gene_ensembl
## 46           csabaeus_gene_ensembl
## 47          csavignyi_gene_ensembl
## 48        csemilaevis_gene_ensembl
## 49          csyrichta_gene_ensembl
## 50        cvariegatus_gene_ensembl
## 51           cwagneri_gene_ensembl
## 52        dclupeoides_gene_ensembl
## 53            dlabrax_gene_ensembl
## 54            dleucas_gene_ensembl
## 55      dmelanogaster_gene_ensembl
## 56      dnovemcinctus_gene_ensembl
## 57             dordii_gene_ensembl
## 58             drerio_gene_ensembl
## 59            easinus_gene_ensembl
## 60           eburgeri_gene_ensembl
## 61          ecaballus_gene_ensembl
## 62       ecalabaricus_gene_ensembl
## 63        eelectricus_gene_ensembl
## 64         eeuropaeus_gene_ensembl
## 65            elucius_gene_ensembl
## 66          etelfairi_gene_ensembl
## 67        falbicollis_gene_ensembl
## 68             fcatus_gene_ensembl
## 69      fheteroclitus_gene_ensembl
## 70         gaculeatus_gene_ensembl
## 71          gevgoodei_gene_ensembl
## 72            gfortis_gene_ensembl
## 73            ggallus_gene_ensembl
## 74           ggorilla_gene_ensembl
## 75            gmorhua_gene_ensembl
## 76           hburtoni_gene_ensembl
## 77             hcomes_gene_ensembl
## 78           hgfemale_gene_ensembl
## 79             hhucho_gene_ensembl
## 80           hsapiens_gene_ensembl
## 81         ipunctatus_gene_ensembl
## 82  itridecemlineatus_gene_ensembl
## 83           jjaculus_gene_ensembl
## 84        kmarmoratus_gene_ensembl
## 85          lafricana_gene_ensembl
## 86          lbergylta_gene_ensembl
## 87        lcalcarifer_gene_ensembl
## 88         lchalumnae_gene_ensembl
## 89            lcrocea_gene_ensembl
## 90       llaticaudata_gene_ensembl
## 91       lleishanense_gene_ensembl
## 92          loculatus_gene_ensembl
## 93           marmatus_gene_ensembl
## 94           mauratus_gene_ensembl
## 95            mcaroli_gene_ensembl
## 96         mdomestica_gene_ensembl
## 97      mfascicularis_gene_ensembl
## 98         mgallopavo_gene_ensembl
## 99       mleucophaeus_gene_ensembl
## 100        mlucifugus_gene_ensembl
## 101         mmmarmota_gene_ensembl
## 102        mmonoceros_gene_ensembl
## 103      mmoschiferus_gene_ensembl
## 104          mmulatta_gene_ensembl
## 105          mmurdjan_gene_ensembl
## 106          mmurinus_gene_ensembl
## 107         mmusculus_gene_ensembl
## 108       mnemestrina_gene_ensembl
## 109      mochrogaster_gene_ensembl
## 110           mpahari_gene_ensembl
## 111            mpfuro_gene_ensembl
## 112       mspicilegus_gene_ensembl
## 113          mspretus_gene_ensembl
## 114            mzebra_gene_ensembl
## 115        nbrichardi_gene_ensembl
## 116          neugenii_gene_ensembl
## 117          nfurzeri_gene_ensembl
## 118           ngalili_gene_ensembl
## 119       nleucogenys_gene_ensembl
## 120             nnaja_gene_ensembl
## 121         nscutatus_gene_ensembl
## 122            nvison_gene_ensembl
## 123         oanatinus_gene_ensembl
## 124     oarambouillet_gene_ensembl
## 125            oaries_gene_ensembl
## 126        ocuniculus_gene_ensembl
## 127            odegus_gene_ensembl
## 128        ogarnettii_gene_ensembl
## 129        ojavanicus_gene_ensembl
## 130          okisutch_gene_ensembl
## 131          olatipes_gene_ensembl
## 132       omelastigma_gene_ensembl
## 133           omykiss_gene_ensembl
## 134        oniloticus_gene_ensembl
## 135         oprinceps_gene_ensembl
## 136         osinensis_gene_ensembl
## 137      otshawytscha_gene_ensembl
## 138           pabelii_gene_ensembl
## 139           panubis_gene_ensembl
## 140         pcapensis_gene_ensembl
## 141          pcatodon_gene_ensembl
## 142         pcinereus_gene_ensembl
## 143        pcoquereli_gene_ensembl
## 144          pformosa_gene_ensembl
## 145       pkingsleyae_gene_ensembl
## 146        platipinna_gene_ensembl
## 147              pleo_gene_ensembl
## 148            pmajor_gene_ensembl
## 149          pmarinus_gene_ensembl
## 150         pmbairdii_gene_ensembl
## 151          pmuralis_gene_ensembl
## 152        pnattereri_gene_ensembl
## 153         pnyererei_gene_ensembl
## 154         ppaniscus_gene_ensembl
## 155           ppardus_gene_ensembl
## 156       preticulata_gene_ensembl
## 157            psimus_gene_ensembl
## 158         psinensis_gene_ensembl
## 159            psinus_gene_ensembl
## 160         ptaltaica_gene_ensembl
## 161         ptextilis_gene_ensembl
## 162      ptroglodytes_gene_ensembl
## 163         pvampyrus_gene_ensembl
## 164            rbieti_gene_ensembl
## 165    rferrumequinum_gene_ensembl
## 166       rnorvegicus_gene_ensembl
## 167        rroxellana_gene_ensembl
## 168          saraneus_gene_ensembl
## 169           saurata_gene_ensembl
## 170     sbboliviensis_gene_ensembl
## 171          scanaria_gene_ensembl
## 172       scaustralis_gene_ensembl
## 173       scerevisiae_gene_ensembl
## 174         sdumerili_gene_ensembl
## 175         sformosus_gene_ensembl
## 176       shabroptila_gene_ensembl
## 177         sharrisii_gene_ensembl
## 178        sldorsalis_gene_ensembl
## 179       slucioperca_gene_ensembl
## 180          smaximus_gene_ensembl
## 181         smerianae_gene_ensembl
## 182         spartitus_gene_ensembl
## 183        spunctatus_gene_ensembl
## 184            ssalar_gene_ensembl
## 185           ssbamei_gene_ensembl
## 186       ssberkshire_gene_ensembl
## 187           sscrofa_gene_ensembl
## 188       sshampshire_gene_ensembl
## 189          ssjinhua_gene_ensembl
## 190        sslandrace_gene_ensembl
## 191      sslargewhite_gene_ensembl
## 192         ssmeishan_gene_ensembl
## 193        sspietrain_gene_ensembl
## 194       ssrongchang_gene_ensembl
## 195         sstibetan_gene_ensembl
## 196          ssusmarc_gene_ensembl
## 197       sswuzhishan_gene_ensembl
## 198           strutta_gene_ensembl
## 199         svulgaris_gene_ensembl
## 200        tbelangeri_gene_ensembl
## 201       tctriunguis_gene_ensembl
## 202          tguttata_gene_ensembl
## 203     tnigroviridis_gene_ensembl
## 204         trubripes_gene_ensembl
## 205        ttruncatus_gene_ensembl
## 206       uamericanus_gene_ensembl
## 207        umaritimus_gene_ensembl
## 208          uparryii_gene_ensembl
## 209            vpacos_gene_ensembl
## 210          vursinus_gene_ensembl
## 211           vvulpes_gene_ensembl
## 212       xcouchianus_gene_ensembl
## 213        xmaculatus_gene_ensembl
## 214       xtropicalis_gene_ensembl
##                                                      description
## 1                          Pink-footed goose genes (ASM259213v1)
## 2                               Eastern happy genes (fAstCal1.3)
## 3                                Green anole genes (AnoCar2.0v2)
## 4                                Golden eagle genes (bAquChr1.2)
## 5                                 Midas cichlid genes (Midas_v5)
## 6                                Giant panda genes (ASM200744v2)
## 7                   Mexican tetra genes (Astyanax_mexicanus-2.0)
## 8                             Ma's night monkey genes (Anan_2.0)
## 9                         Clown anemonefish genes (ASM2253959v1)
## 10                              Orange clownfish genes (Nemo_v1)
## 11                                   Mallard genes (ASM874695v1)
## 12                             Spiny chromis genes (ASM210954v1)
## 13                                      Duck genes (CAU_duck1.0)
## 14                             Climbing perch genes (fAnaTes1.3)
## 15                           American bison genes (Bison_UMD1.0)
## 16                           Domestic yak genes (LU_Bosgru_v3.0)
## 17                    Hybrid - Bos Indicus genes (UOA_Brahman_1)
## 18                                Blue whale genes (mBalMus1.v2)
## 19                                  Wild yak genes (BosGru_v2.0)
## 20                      Siamese fighting fish genes (fBetSpl5.2)
## 21                                        Cow genes (ARS-UCD1.3)
## 22            Abingdon island giant tortoise genes (ASM359739v1)
## 23                               Sooty mangabey genes (Caty_1.0)
## 24                                  Goldfish genes (ASM336829v1)
## 25                            Common carp genes (Cypcar_WagV4.0)
## 26                                 Arabian camel genes (CamDro2)
## 27        Caenorhabditis elegans (Nematode, N2) genes (WBcel235)
## 28                  Chinese hamster CHOK1GS genes (CHOK1GS_HDv1)
## 29                        Channel bull blenny genes (fCotGob3.1)
## 30                         Atlantic herring  genes (Ch_v2.0.2v2)
## 31                                             Goat genes (ARS1)
## 32                                         Sloth genes (choHof1)
## 33                                   Yarkand deer genes (CEY_v1)
## 34    Panamanian white-faced capuchin genes (Cebus_imitator-1.0)
## 35                                     C.intestinalis genes (KH)
## 36              White-tufted-ear marmoset genes (mCalJac1.pat.X)
## 37                  Japanese quail genes (Coturnix_japonica_2.0)
## 38                      Long-tailed chinchilla genes (ChiLan1.0)
## 39                                     Dingo genes (ASM325472v1)
## 40                                      Dog genes (ROS_Cfam_1.0)
## 41                                 Lumpfish genes (fCycLum1.pri)
## 42              Elephant shark genes (Callorhinchus_milii-6.1.3)
## 43           Painted turtle genes (Chrysemys_picta_bellii-3.0.3)
## 44                                  Guinea Pig genes (Cavpor3.0)
## 45           Australian saltwater crocodile genes (CroPor_comp1)
## 46                                  Vervet-AGM genes (ChlSab1.1)
## 47                                   C.savignyi genes (CSAV 2.0)
## 48                                  Tongue sole genes (Cse_v1.0)
## 49                        Tarsier genes (Tarsius_syrichta-2.0.1)
## 50                    Sheepshead minnow genes (C_variegatus-1.0)
## 51                    Chacoan peccary genes (CatWag_v2_BIUU_UCD)
## 52                           Denticle herring genes (fDenClu1.2)
## 53                          European seabass genes (dlabrax2021)
## 54                              Beluga whale genes (ASM228892v3)
## 55          Drosophila melanogaster (Fruit fly) genes (BDGP6.46)
## 56                                   Armadillo genes (Dasnov3.0)
## 57                                 Kangaroo rat genes (Dord_2.0)
## 58                                      Zebrafish genes (GRCz11)
## 59                                   Donkey genes (ASM1607732v2)
## 60                                  Hagfish genes (Eburgeri_3.2)
## 61                                       Horse genes (EquCab3.0)
## 62                                   Reedfish genes (fErpCal1.1)
## 63                             Electric eel genes (fEleEle1.pri)
## 64                                      Hedgehog genes (eriEur1)
## 65                            Northern pike genes (fEsoLuc1.pri)
## 66                         Lesser hedgehog tenrec genes (TENREC)
## 67                         Collared flycatcher genes (FicAlb1.5)
## 68                                   Cat genes (Felis_catus_9.0)
## 69                 Mummichog genes (Fundulus_heteroclitus-3.0.2)
## 70                   Stickleback genes (GAculeatus_UGA_version5)
## 71              Goodes thornscrub tortoise genes (rGopEvg1_v1.p)
## 72                        Medium ground-finch genes (GeoFor_1.0)
## 73                   Chicken genes (bGalGal1.mat.broiler.GRCg7b)
## 74                                       Gorilla genes (gorGor4)
## 75                                Atlantic cod genes (gadMor3.0)
## 76                       Burton's mouthbrooder genes (AstBur1.0)
## 77                    Tiger tail seahorse genes (H_comes_QL1_v1)
## 78         Naked mole-rat female genes (Naked_mole-rat_maternal)
## 79                                    Huchen genes (ASM331708v1)
## 80                                      Human genes (GRCh38.p14)
## 81                           Channel catfish genes (ASM400665v3)
## 82                                    Squirrel genes (SpeTri2.0)
## 83                      Lesser Egyptian jerboa genes (JacJac1.0)
## 84                          Mangrove rivulus genes (ASM164957v1)
## 85                                    Elephant genes (Loxafr3.0)
## 86                              Ballan wrasse genes (BallGen_V1)
## 87                  Barramundi perch genes (ASB_HGAPassembly_v1)
## 88                                    Coelacanth genes (LatCha1)
## 89                     Large yellow croaker genes (L_crocea_2.0)
## 90                      Blue-ringed sea krait genes (latLat_1.0)
## 91                        Leishan spiny toad genes (ASM966780v1)
## 92                                   Spotted gar genes (LepOcu1)
## 93                                Zig-zag eel genes (fMasArm1.2)
## 94                              Golden Hamster genes (MesAur1.0)
## 95                          Ryukyu mouse genes (CAROLI_EIJ_v1.1)
## 96                                      Opossum genes (ASM229v1)
## 97           Crab-eating macaque genes (Macaca_fascicularis_6.0)
## 98                                     Turkey genes (Turkey_5.1)
## 99                                     Drill genes (Mleu.le_1.0)
## 100                                   Microbat genes (Myoluc2.0)
## 101                              Alpine marmot genes (marMar2.1)
## 102                                Narwhal genes (NGI_Narwhal_1)
## 103                Siberian musk deer genes (MosMos_v2_BIUU_UCD)
## 104                                      Macaque genes (Mmul_10)
## 105                      Pinecone soldierfish genes (fMyrMur1.1)
## 106                                 Mouse Lemur genes (Mmur_3.0)
## 107                                         Mouse genes (GRCm39)
## 108                          Pig-tailed macaque genes (Mnem_1.0)
## 109                               Prairie vole genes (MicOch1.0)
## 110                          Shrew mouse genes (PAHARI_EIJ_v1.1)
## 111                                  Ferret genes (MusPutFur1.0)
## 112                                 Steppe mouse genes (MUSP714)
## 113                          Algerian mouse genes (SPRET_EiJ_v1)
## 114                            Zebra mbuna genes (M_zebra_UMD2a)
## 115                           Lyretail cichlid genes (NeoBri1.0)
## 116                                     Wallaby genes (Meug_1.0)
## 117                     Turquoise killifish genes (Nfu_20140520)
## 118 Upper Galilee mountains blind mole rat genes (S.galili_v1.0)
## 119                                      Gibbon genes (Nleu_3.0)
## 120                                 Indian cobra genes (Nana_v5)
## 121                     Mainland tiger snake genes (TS10Xv2-PRI)
## 122                              American mink genes (NNQGG.v01)
## 123                               Platypus genes (mOrnAna1.p.v1)
## 124                               Sheep genes (ARS-UI_Ramb_v2.0)
## 125                               Sheep (texel) genes (Oar_v3.1)
## 126                                     Rabbit genes (OryCun2.0)
## 127                                       Degu genes (OctDeg1.0)
## 128                                     Bushbaby genes (OtoGar3)
## 129                           Javanese ricefish genes (OJAV_1.1)
## 130                                  Coho salmon genes (Okis_V2)
## 131                     Japanese medaka HdrR genes (ASM223467v1)
## 132                           Indian medaka genes (Om_v0.7.RACA)
## 133                         Rainbow trout genes (USDA_OmykA_1.1)
## 134                    Nile tilapia genes (O_niloticus_UMD_NMBU)
## 135                                   Pika genes (OchPri2.0-Ens)
## 136                           Chinese medaka genes (ASM858656v1)
## 137                             Chinook salmon genes (Otsh_v2.0)
## 138                       Sumatran orangutan genes (Susie_PABv2)
## 139                              Olive baboon genes (Panubis1.0)
## 140                                        Hyrax genes (proCap1)
## 141                              Sperm whale genes (ASM283717v2)
## 142                               Koala genes (phaCin_unsw_v4.1)
## 143                           Coquerel's sifaka genes (Pcoq_1.0)
## 144                  Amazon molly genes (Poecilia_formosa-5.1.2)
## 145                  Paramormyrops kingsleyae genes (PKINGS_0.1)
## 146                        Sailfin molly genes (P_latipinna-1.0)
## 147                                       Lion genes (PanLeo1.0)
## 148                             Great Tit genes (Parus_major1.1)
## 149                                 Lamprey genes (Pmarinus_7.0)
## 150             Northern American deer mouse genes (HU_Pman_2.1)
## 151                        Common wall lizard genes (PodMur_1.0)
## 152                     Red-bellied piranha genes (fPygNat1.pri)
## 153                      Makobe Island cichlid genes (PunNye1.0)
## 154                                     Bonobo genes (panpan1.1)
## 155                                    Leopard genes (PanPar1.0)
## 156                            Guppy genes (Guppy_female_1.0_MT)
## 157                      Greater bamboo lemur genes (Prosim_1.0)
## 158                  Chinese softshell turtle genes (PelSin_1.0)
## 159                                 Vaquita genes (mPhoSin1.pri)
## 160                                      Tiger genes (PanTig1.0)
## 161                     Eastern brown snake genes (EBS10Xv2-PRI)
## 162                               Chimpanzee genes (Pan_tro_3.0)
## 163                                      Megabat genes (pteVam1)
## 164                  Black snub-nosed monkey genes (ASM169854v1)
## 165                  Greater horseshoe bat genes (mRhiFer1_v1.p)
## 166                                        Rat genes (mRatBN7.2)
## 167                     Golden snub-nosed monkey genes (Rrox_v1)
## 168                                        Shrew genes (sorAra1)
## 169                         Gilthead seabream genes (fSpaAur1.1)
## 170                   Bolivian squirrel monkey genes (SaiBol1.0)
## 171                                   Common canary genes (SCA1)
## 172                           African ostrich genes (ASM69896v1)
## 173                     Saccharomyces cerevisiae genes (R64-1-1)
## 174                            Greater amberjack genes (Sdu_1.0)
## 175                          Asian bonytongue genes (fSclFor1.1)
## 176                                 Kakapo genes (bStrHab1_v1.p)
## 177                          Tasmanian devil genes (mSarHar1.11)
## 178                          Yellowtail amberjack genes (Sedor1)
## 179                                Pike-perch genes (SLUC_FBN_1)
## 180                                  Turbot genes (ASM1334776v1)
## 181             Argentine black and white tegu genes (HLtupMer3)
## 182          Bicolor damselfish genes (Stegastes_partitus-1.0.2)
## 183                                  Tuatara genes (ASM311381v1)
## 184                            Atlantic salmon genes (Ssal_v3.1)
## 185                             Pig - Bamei genes (Bamei_pig_v1)
## 186                     Pig - Berkshire genes (Berkshire_pig_v1)
## 187                                      Pig genes (Sscrofa11.1)
## 188                     Pig - Hampshire genes (Hampshire_pig_v1)
## 189                           Pig - Jinhua genes (Jinhua_pig_v1)
## 190                       Pig - Landrace genes (Landrace_pig_v1)
## 191                      Pig - Largewhite genes (Large_White_v1)
## 192                         Pig - Meishan genes (Meishan_pig_v1)
## 193                       Pig - Pietrain genes (Pietrain_pig_v1)
## 194                     Pig - Rongchang genes (Rongchang_pig_v1)
## 195                         Pig - Tibetan genes (Tibetan_Pig_v2)
## 196                                Pig USMARC genes (USMARCv1.0)
## 197                         Pig - Wuzhishan genes (minipig_v1.0)
## 198                               Brown trout genes (fSalTru1.1)
## 199                     Eurasian red squirrel genes (mSciVul1.1)
## 200                                   Tree Shrew genes (tupBel1)
## 201              Three-toed box turtle genes (T_m_triunguis-2.0)
## 202                            Zebra finch genes (bTaeGut1_v1.p)
## 203                              Tetraodon genes (TETRAODON 8.0)
## 204                                      Fugu genes (fTakRub1.2)
## 205                                      Dolphin genes (turTru1)
## 206                      American black bear genes (ASM334442v1)
## 207                                Polar bear genes (UrsMar_1.0)
## 208                   Arctic ground squirrel genes (ASM342692v1)
## 209                                       Alpaca genes (vicPac1)
## 210      Common wombat genes (bare-nosed_wombat_genome_assembly)
## 211                                    Red fox genes (VulVul2.2)
## 212     Monterrey platyfish genes (Xiphophorus_couchianus-4.0.1)
## 213                       Platyfish genes (X_maculatus-5.0-male)
## 214                   Tropical clawed frog genes (UCB_Xtro_10.0)
##                               version
## 1                         ASM259213v1
## 2                          fAstCal1.3
## 3                         AnoCar2.0v2
## 4                          bAquChr1.2
## 5                            Midas_v5
## 6                         ASM200744v2
## 7              Astyanax_mexicanus-2.0
## 8                            Anan_2.0
## 9                        ASM2253959v1
## 10                            Nemo_v1
## 11                        ASM874695v1
## 12                        ASM210954v1
## 13                        CAU_duck1.0
## 14                         fAnaTes1.3
## 15                       Bison_UMD1.0
## 16                     LU_Bosgru_v3.0
## 17                      UOA_Brahman_1
## 18                        mBalMus1.v2
## 19                        BosGru_v2.0
## 20                         fBetSpl5.2
## 21                         ARS-UCD1.3
## 22                        ASM359739v1
## 23                           Caty_1.0
## 24                        ASM336829v1
## 25                     Cypcar_WagV4.0
## 26                            CamDro2
## 27                           WBcel235
## 28                       CHOK1GS_HDv1
## 29                         fCotGob3.1
## 30                        Ch_v2.0.2v2
## 31                               ARS1
## 32                            choHof1
## 33                             CEY_v1
## 34                 Cebus_imitator-1.0
## 35                                 KH
## 36                     mCalJac1.pat.X
## 37              Coturnix_japonica_2.0
## 38                          ChiLan1.0
## 39                        ASM325472v1
## 40                       ROS_Cfam_1.0
## 41                       fCycLum1.pri
## 42          Callorhinchus_milii-6.1.3
## 43       Chrysemys_picta_bellii-3.0.3
## 44                          Cavpor3.0
## 45                       CroPor_comp1
## 46                          ChlSab1.1
## 47                           CSAV 2.0
## 48                           Cse_v1.0
## 49             Tarsius_syrichta-2.0.1
## 50                   C_variegatus-1.0
## 51                 CatWag_v2_BIUU_UCD
## 52                         fDenClu1.2
## 53                        dlabrax2021
## 54                        ASM228892v3
## 55                           BDGP6.46
## 56                          Dasnov3.0
## 57                           Dord_2.0
## 58                             GRCz11
## 59                       ASM1607732v2
## 60                       Eburgeri_3.2
## 61                          EquCab3.0
## 62                         fErpCal1.1
## 63                       fEleEle1.pri
## 64                            eriEur1
## 65                       fEsoLuc1.pri
## 66                             TENREC
## 67                          FicAlb1.5
## 68                    Felis_catus_9.0
## 69        Fundulus_heteroclitus-3.0.2
## 70            GAculeatus_UGA_version5
## 71                      rGopEvg1_v1.p
## 72                         GeoFor_1.0
## 73        bGalGal1.mat.broiler.GRCg7b
## 74                            gorGor4
## 75                          gadMor3.0
## 76                          AstBur1.0
## 77                     H_comes_QL1_v1
## 78            Naked_mole-rat_maternal
## 79                        ASM331708v1
## 80                         GRCh38.p14
## 81                        ASM400665v3
## 82                          SpeTri2.0
## 83                          JacJac1.0
## 84                        ASM164957v1
## 85                          Loxafr3.0
## 86                         BallGen_V1
## 87                ASB_HGAPassembly_v1
## 88                            LatCha1
## 89                       L_crocea_2.0
## 90                         latLat_1.0
## 91                        ASM966780v1
## 92                            LepOcu1
## 93                         fMasArm1.2
## 94                          MesAur1.0
## 95                    CAROLI_EIJ_v1.1
## 96                           ASM229v1
## 97            Macaca_fascicularis_6.0
## 98                         Turkey_5.1
## 99                        Mleu.le_1.0
## 100                         Myoluc2.0
## 101                         marMar2.1
## 102                     NGI_Narwhal_1
## 103                MosMos_v2_BIUU_UCD
## 104                           Mmul_10
## 105                        fMyrMur1.1
## 106                          Mmur_3.0
## 107                            GRCm39
## 108                          Mnem_1.0
## 109                         MicOch1.0
## 110                   PAHARI_EIJ_v1.1
## 111                      MusPutFur1.0
## 112                           MUSP714
## 113                      SPRET_EiJ_v1
## 114                     M_zebra_UMD2a
## 115                         NeoBri1.0
## 116                          Meug_1.0
## 117                      Nfu_20140520
## 118                     S.galili_v1.0
## 119                          Nleu_3.0
## 120                           Nana_v5
## 121                       TS10Xv2-PRI
## 122                         NNQGG.v01
## 123                     mOrnAna1.p.v1
## 124                  ARS-UI_Ramb_v2.0
## 125                          Oar_v3.1
## 126                         OryCun2.0
## 127                         OctDeg1.0
## 128                           OtoGar3
## 129                          OJAV_1.1
## 130                           Okis_V2
## 131                       ASM223467v1
## 132                      Om_v0.7.RACA
## 133                    USDA_OmykA_1.1
## 134              O_niloticus_UMD_NMBU
## 135                     OchPri2.0-Ens
## 136                       ASM858656v1
## 137                         Otsh_v2.0
## 138                       Susie_PABv2
## 139                        Panubis1.0
## 140                           proCap1
## 141                       ASM283717v2
## 142                  phaCin_unsw_v4.1
## 143                          Pcoq_1.0
## 144            Poecilia_formosa-5.1.2
## 145                        PKINGS_0.1
## 146                   P_latipinna-1.0
## 147                         PanLeo1.0
## 148                    Parus_major1.1
## 149                      Pmarinus_7.0
## 150                       HU_Pman_2.1
## 151                        PodMur_1.0
## 152                      fPygNat1.pri
## 153                         PunNye1.0
## 154                         panpan1.1
## 155                         PanPar1.0
## 156               Guppy_female_1.0_MT
## 157                        Prosim_1.0
## 158                        PelSin_1.0
## 159                      mPhoSin1.pri
## 160                         PanTig1.0
## 161                      EBS10Xv2-PRI
## 162                       Pan_tro_3.0
## 163                           pteVam1
## 164                       ASM169854v1
## 165                     mRhiFer1_v1.p
## 166                         mRatBN7.2
## 167                           Rrox_v1
## 168                           sorAra1
## 169                        fSpaAur1.1
## 170                         SaiBol1.0
## 171                              SCA1
## 172                        ASM69896v1
## 173                           R64-1-1
## 174                           Sdu_1.0
## 175                        fSclFor1.1
## 176                     bStrHab1_v1.p
## 177                       mSarHar1.11
## 178                            Sedor1
## 179                        SLUC_FBN_1
## 180                      ASM1334776v1
## 181                         HLtupMer3
## 182          Stegastes_partitus-1.0.2
## 183                       ASM311381v1
## 184                         Ssal_v3.1
## 185                      Bamei_pig_v1
## 186                  Berkshire_pig_v1
## 187                       Sscrofa11.1
## 188                  Hampshire_pig_v1
## 189                     Jinhua_pig_v1
## 190                   Landrace_pig_v1
## 191                    Large_White_v1
## 192                    Meishan_pig_v1
## 193                   Pietrain_pig_v1
## 194                  Rongchang_pig_v1
## 195                    Tibetan_Pig_v2
## 196                        USMARCv1.0
## 197                      minipig_v1.0
## 198                        fSalTru1.1
## 199                        mSciVul1.1
## 200                           tupBel1
## 201                 T_m_triunguis-2.0
## 202                     bTaeGut1_v1.p
## 203                     TETRAODON 8.0
## 204                        fTakRub1.2
## 205                           turTru1
## 206                       ASM334442v1
## 207                        UrsMar_1.0
## 208                       ASM342692v1
## 209                           vicPac1
## 210 bare-nosed_wombat_genome_assembly
## 211                         VulVul2.2
## 212      Xiphophorus_couchianus-4.0.1
## 213              X_maculatus-5.0-male
## 214                     UCB_Xtro_10.0
listEnsemblArchives()
##              name     date                                 url version
## 1  Ensembl GRCh37 Feb 2014          https://grch37.ensembl.org  GRCh37
## 2     Ensembl 112 May 2024 https://may2024.archive.ensembl.org     112
## 3     Ensembl 111 Jan 2024 https://jan2024.archive.ensembl.org     111
## 4     Ensembl 110 Jul 2023 https://jul2023.archive.ensembl.org     110
## 5     Ensembl 109 Feb 2023 https://feb2023.archive.ensembl.org     109
## 6     Ensembl 108 Oct 2022 https://oct2022.archive.ensembl.org     108
## 7     Ensembl 107 Jul 2022 https://jul2022.archive.ensembl.org     107
## 8     Ensembl 106 Apr 2022 https://apr2022.archive.ensembl.org     106
## 9     Ensembl 105 Dec 2021 https://dec2021.archive.ensembl.org     105
## 10    Ensembl 104 May 2021 https://may2021.archive.ensembl.org     104
## 11    Ensembl 103 Feb 2021 https://feb2021.archive.ensembl.org     103
## 12    Ensembl 102 Nov 2020 https://nov2020.archive.ensembl.org     102
## 13    Ensembl 101 Aug 2020 https://aug2020.archive.ensembl.org     101
## 14    Ensembl 100 Apr 2020 https://apr2020.archive.ensembl.org     100
## 15     Ensembl 99 Jan 2020 https://jan2020.archive.ensembl.org      99
## 16     Ensembl 98 Sep 2019 https://sep2019.archive.ensembl.org      98
## 17     Ensembl 97 Jul 2019 https://jul2019.archive.ensembl.org      97
## 18     Ensembl 80 May 2015 https://may2015.archive.ensembl.org      80
## 19     Ensembl 77 Oct 2014 https://oct2014.archive.ensembl.org      77
## 20     Ensembl 75 Feb 2014 https://feb2014.archive.ensembl.org      75
## 21     Ensembl 54 May 2009 https://may2009.archive.ensembl.org      54
##    current_release
## 1                 
## 2                *
## 3                 
## 4                 
## 5                 
## 6                 
## 7                 
## 8                 
## 9                 
## 10                
## 11                
## 12                
## 13                
## 14                
## 15                
## 16                
## 17                
## 18                
## 19                
## 20                
## 21
dataset <- useDataset("mfascicularis_gene_ensembl", mart = ensembl)

ensembl_99 <- useMart(biomart = "ENSEMBL_MART_ENSEMBL", host = "https://jan2020.archive.ensembl.org" )

#listDatasets(ensembl_99)

#Use ensembl_99
dataset2 <-  useDataset("mfascicularis_gene_ensembl", mart = ensembl_99)

# List available attributes
attributes <- listAttributes(dataset)
head(attributes)
##                            name                  description         page
## 1               ensembl_gene_id               Gene stable ID feature_page
## 2       ensembl_gene_id_version       Gene stable ID version feature_page
## 3         ensembl_transcript_id         Transcript stable ID feature_page
## 4 ensembl_transcript_id_version Transcript stable ID version feature_page
## 5            ensembl_peptide_id            Protein stable ID feature_page
## 6    ensembl_peptide_id_version    Protein stable ID version feature_page
attributes1 <- c("ensembl_gene_id", "external_gene_name", "wikigene_name")
result1 <- getBM(attributes = attributes1,
                 filters = "ensembl_gene_id",
                 values = genes_2$`rownames(raw_list$Niu_paper)`,
                 mart = dataset)

# Query for ensembl_gene_id, hsapiens_homolog_ensembl_gene, and hsapiens_homolog_associated_gene_name
attributes2 <- c("ensembl_gene_id", "hsapiens_homolog_ensembl_gene", "hsapiens_homolog_associated_gene_name")
result2 <- getBM(attributes = attributes2,
                 filters = "ensembl_gene_id",
                 values = genes_2$`rownames(raw_list$Niu_paper)`,
                 mart = dataset)

result3 <- getBM(attributes = attributes1,
                      filters = "ensembl_gene_id",
                      values = genes_2$`rownames(raw_list$Niu_paper)`,
                      mart = dataset2)

# Query Macaca fascicularis genes and their human homologs
result4 <- getBM(attributes = attributes2,
                      filters = "ensembl_gene_id",
                      values = genes_2$`rownames(raw_list$Niu_paper)`,
                      mart = dataset2)

# Merge result1 and result2
merged_result <- merge(result1, result2, by = "ensembl_gene_id", all = TRUE)
merged_result <- merge(merged_result, result3, by = "ensembl_gene_id", all = TRUE)
merged_result <- merge(merged_result, result4, by = "ensembl_gene_id", all = TRUE)


# Define a function to assign gene names based on column priority
assign_gene_name <- function(row) {
  # Priority order of columns
  priority_columns <- c("external_gene_name.x", 
                        "hsapiens_homolog_associated_gene_name.x", 
                        "external_gene_name.y", 
                        "hsapiens_homolog_associated_gene_name.y")
  
  # Iterate over the columns and return the first non-NA value
  for (col in priority_columns) {
    if (!is.na(row[[col]]) && row[[col]] != "") {
      return(row[[col]])
    }
  }
  return(NA) # Return NA if all values are missing or empty
}

# Apply the function to each row in the data frame
merged_result$assigned_gene_name <- apply(merged_result, 1, assign_gene_name)

# Remove duplicates based on 'ensembl_gene_id', keeping the first occurrence
merged_result <- merged_result[!duplicated(merged_result$ensembl_gene_id), ]

merged_result <- merged_result[,c("ensembl_gene_id","assigned_gene_name")]

merged_result <- merged_result[match(genes_2$`rownames(raw_list$Niu_paper)`,merged_result$ensembl_gene_id ),]

genes_2$genename <- merged_result$assigned_gene_name

genes.rest$genename <- genes.rest$`rownames(raw_list$Niu_paper)`

gene.all <- rbind(genes_2,genes.rest )


#check again
any(duplicated(gene.all$genename )) 
## [1] TRUE
# Step 1: Identify all duplicated gene names
duplicated_indices <- duplicated(gene.all$genename) | duplicated(gene.all$genename, fromLast = TRUE)

# Step 2: Replace all occurrences of duplicated gene names with corresponding row names
gene.all$genename[duplicated_indices] <- gene.all$`rownames(raw_list$Niu_paper)`[duplicated_indices]

# Step 3: Check for remaining duplicates
any(duplicated(gene.all$genename))
## [1] FALSE
gene.all <- gene.all[match(rownames(raw_list$Niu_paper),gene.all$`rownames(raw_list$Niu_paper)` ),]

##check again
rownames(raw_list$Niu_paper)[1:5]
## [1] "ENSMFAG00000000001" "ENSMFAG00000000002" "ENSMFAG00000000003"
## [4] "ENSMFAG00000000004" "ENSMFAG00000000005"
rownames(raw_list$Niu_paper) <-gene.all$genename

########################

raw_list$Niu_paper$embryo <- raw_list$Niu_paper$orig.ident
raw_list$Niu_paper$orig.ident <- "Niu"
##pre-analysis
VlnPlot(raw_list$Niu_paper, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.

Niu <-subset(raw_list$Niu_paper)

Niu <- NormalizeData(Niu, normalization.method = "LogNormalize", scale.factor = 10000)
## Normalizing layer: counts
Niu <- FindVariableFeatures(Niu, selection.method = "vst", nfeatures = 2000)
## Finding variable features for layer counts
# Identify the 10 most highly variable genes
head(VariableFeatures(Niu), 10)
##  [1] "HTRA4"    "LAIR2"    "ACTA2"    "PRG2"     "ISM2"     "C15orf48"
##  [7] "MMP2"     "ECM1"     "KHDC3L"   "ESAM"
##PCA
Niu <- ScaleData(Niu)
## Centering and scaling data matrix
Niu <- RunPCA(Niu, features = VariableFeatures(object = Niu))
## PC_ 1 
## Positive:  GCM1, CYP11A1, ENSMFAG00000038879, ENSMFAG00000032513, ENSMFAG00000040054, S100P, IL1RN, SERPINB9, CTSL, GATA3 
##     CDKN1C, ATG4A, ENSMFAG00000041983, PECAM1, C11orf91, WIPI1, PTGIS, ENSMFAG00000035592, TINAGL1, CD55 
##     PHLDA2, ECM1, HMOX1, TFAP2A, CD164, VSIR, INSL4, GATA2, VAMP8, PCSK5 
## Negative:  MDK, PDXP, SNRPN, NME4, APOE, VCAN, 5-8S-rRNA, ENSMFAG00000030038, IFITM1, CCND2 
##     ENSMFAG00000000002, TERF1, CTHRC1, SFRP1, DPPA5, MAP1B, ENSMFAG00000030465, COL1A1, TIMP1, HGF 
##     ENSMFAG00000026088, ENSMFAG00000009591, COL3A1, HAS2, ALLC, TMEM88, RARRES2, POU5F1B, COL1A2, EGR1 
## PC_ 2 
## Positive:  ENSMFAG00000037868, NLRP7, NHERF1, DPPA3, NLRP2, ALPL, CCKBR, ENSMFAG00000045681, KHDC1L, PDZK1 
##     ENSMFAG00000030465, TTR, GDF3, COL9A2, F2, S100A16, IHH, DNAH2, SLC7A8, LRP2 
##     ENSMFAG00000026088, ENSMFAG00000046163, SLC5A6, CDH2, SPINK1, FOLR1, TCN2, RRM1, NODAL, DPP4 
## Negative:  IGFBP3, ANXA1, COL1A1, COL3A1, DCN, COL1A2, TIMP1, PLOD2, TAGLN, ARHGDIB 
##     TGFBI, HAPLN1, SERPING1, COL6A3, HGF, PCOLCE, LUM, CD47, COL5A2, MMP2 
##     COLEC10, TPM2, SERPINE1, NID1, B2M, SULT1C4, COL6A2, CCDC80, PMP22, FBN1 
## PC_ 3 
## Positive:  ENSMFAG00000000002, ENSMFAG00000009591, 5-8S-rRNA, GATA2, DPPA5, POU5F1B, ENSMFAG00000041983, 5-8S-rRNA.2, NMRK2, NFE2L3 
##     TFAP2C, FYB1, TUBA4A, ENSMFAG00000041377, AC087632.2, S100P, STARD10, VGLL1, DLX5, KHDC3L 
##     TERF1, SLTM, TFEB, ENO3, ENSMFAG00000032669, ENSMFAG00000002580, GPR78, AVPI1, PLEKHA6, SLC7A3 
## Negative:  FN1, GSN, TFPI, VTN, SERPINE2, PLTP, MAGED2, VCAN, CDH2, PDZK1 
##     ENSMFAG00000045681, F2, ENSMFAG00000046163, DNAH2, ITM2C, GPC3, RNASE4, TTR, SPARC, ENPP1 
##     TPM1, ECE1, IHH, TCN2, CST3, ENSMFAG00000044722, BMP2, PPFIBP2, COL9A2, DPPA3 
## PC_ 4 
## Positive:  DCN, COL3A1, COL1A2, COL1A1, DAB2, PCOLCE, TGFBI, GADD45G, SPARC, HGF 
##     LUM, COL5A2, COL6A3, PDLIM2, TIMP1, ALLC, COLEC10, ENSMFAG00000025044, HSPB8, SULT1C4 
##     COL6A2, HAPLN1, OLFML3, EGFLAM, CTHRC1, CAVIN3, PDXP, POSTN, A2M, NID2 
## Negative:  ISM2, HTRA4, ESAM, LAIR2, PLAC8, NOTUM, PRG2, MFAP5, KCNJ15, NCAM1 
##     INSL4, CSF2RA, MCAM, MGLL, ENSMFAG00000016007, ADGRG6, PECAM1, MMP12, RASSF6, HS3ST1 
##     ICAM1, TINAGL1, LAMA4, LAIR1, SEPTIN3, C15orf48, JAM2, LVRN, ECM1, LGALS3 
## PC_ 5 
## Positive:  ENSMFAG00000031396, ENSMFAG00000045627, ENSMFAG00000002784, ENSMFAG00000003363, IQCD, ENSMFAG00000034698, SAT1, ENSMFAG00000038834, PPP1R14A, ASB2 
##     ENSMFAG00000046277, ESRRG, ENSMFAG00000024252, ENSMFAG00000002580, PRR9, SLC13A4, HOPX, ENSMFAG00000032669, CGA, ENSMFAG00000046378 
##     HSPB8, WNT10B, MFSD2A, SCG3, CCR7, RASA1, ENSMFAG00000025044, GPR146, KRT23, IL1RN 
## Negative:  SLC30A2, ATP6V0A4, ATP12A, CLPS, ANXA8, ENSMFAG00000033208, S100A14, CD53, SLC12A3, PLD2 
##     CDX1, SMIM1, ATP6V1B1, LRP2, AC034228.3, STS, FFAR4, S100A6, SLC34A2, GM2A 
##     SUSD2, A4GALT, KANK4, MET, SH3BGRL2, EMP2, LAMA1, GPRC5A, RAB11FIP4, PTGES
VizDimLoadings(Niu, dims = 1:2, reduction = "pca")

DimPlot(Niu, reduction = "pca") + NoLegend()

DimHeatmap(Niu, dims = 1:6, cells = 500, balanced = TRUE)

ElbowPlot(Niu)

library(clustree)

Niu <- FindNeighbors(Niu, dims = 1:10, k=10) 
## Computing nearest neighbor graph
## Computing SNN
Niu <- FindClusters(Niu, resolution = seq(0.1,0.6,0.1), verbose = FALSE)

clustree(Niu)

Niu <- FindClusters(Niu, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 600
## Number of edges: 7321
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8990
## Number of communities: 11
## Elapsed time: 0 seconds
Niu <- RunUMAP(Niu, dims = 1:10)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 20:04:04 UMAP embedding parameters a = 0.9922 b = 1.112
## 20:04:04 Read 600 rows and found 10 numeric columns
## 20:04:04 Using Annoy for neighbor search, n_neighbors = 30
## 20:04:04 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 20:04:04 Writing NN index file to temp file C:\Users\fxyin\AppData\Local\Temp\Rtmpi6jUzC\filee18421f4d1b
## 20:04:04 Searching Annoy index using 1 thread, search_k = 3000
## 20:04:04 Annoy recall = 100%
## 20:04:05 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 20:04:06 Initializing from normalized Laplacian + noise (using RSpectra)
## 20:04:06 Commencing optimization for 500 epochs, with 22016 positive edges
## 20:04:09 Optimization finished
# individual clusters
DimPlot(Niu, reduction = "umap")

DimPlot(Niu, reduction = "umap", group.by = "stage")

##################use genemodules#############
epi <- list(c("POU5F1","DPPA5", "KHDC3L", "FOXH1"))
TE <- list(c("CGA", "XAGE3", "PGF", "KRT7", "GATA2" ))
PE <- list(c("TTR", "FOXA2", "APOA1", "DPPA3", "SPINK1"))
EXMC <- list(c("COL3A1", "POSTN", "HAND2", "COL1A1"))

Niu <- AddModuleScore(Niu,
                  features = epi,
                  name="epi_enriched", random.seed = 1)
## Warning: The following features are not present in the object: POU5F1, not
## searching for symbol synonyms
Niu <- AddModuleScore(Niu,
                  features = TE,
                  name="TE_enriched", random.seed = 1)

Niu <- AddModuleScore(Niu,
                  features = PE,
                  name="PE_enriched", random.seed = 1)
## Warning: The following features are not present in the object: APOA1, not
## searching for symbol synonyms
Niu <- AddModuleScore(Niu,
                  features = EXMC,
                  name="EXMC_enriched", random.seed = 1)



# Plot scores
DimPlot(Niu, reduction = "umap")

FeaturePlot(Niu,
            features = "epi_enriched1", label = TRUE, repel = TRUE) +
            scale_color_distiller(palette = "RdYlBu")
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

FeaturePlot(Niu,
            features = "TE_enriched1", label = TRUE, repel = TRUE) +
            scale_color_distiller(palette = "RdYlBu")
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

FeaturePlot(Niu,
            features = "PE_enriched1", label = TRUE, repel = TRUE) +
            scale_color_distiller(palette = "RdYlBu")
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

FeaturePlot(Niu,
            features = "EXMC_enriched1", label = TRUE, repel = TRUE) +
            scale_color_distiller(palette = "RdYlBu")
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

####rename the clusters#########################
Niu$anno <- c("NA")
Niu$anno[which(Idents(Niu) %in% c('4', '6'))] <- c("PE")
Niu$anno[which(Idents(Niu) %in% c('1','2', '9'))] <- c("Epi")
Niu$anno[which(Idents(Niu) %in% c('0', '7'))] <- c("EXMC")
Niu$anno[which(Idents(Niu) %in% c('3','8', '10'))] <- c("TE")


DimPlot(Niu, reduction = "umap", group.by = "anno")

###add the anno

saveRDS(Niu, file = "Monkey_Niu_reanno.rds")

raw_list$Niu_paper$anno <- Niu$anno

#####plots########

VlnPlot(raw_list$Niu_paper, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.

meta <- raw_list$Niu_paper@meta.data
df2 <- as.data.frame(table(meta$stage, meta$anno))

##plot###n_cells before QC
ggplot(data=df2, aes(x=Var1, y=Freq, fill=Var2)) +
    geom_bar(stat="identity")+ 
  theme(text=element_text(size=30))

##Yang paper ##fix gene names

genes <- as.data.frame(rownames(raw_list$Yang)) 
genes_2 <- genes[grep("ENSMFAG", genes[,1]),,drop=FALSE]
genes.rest <- genes[-grep("ENSMFAG", genes[,1]),,drop=FALSE]

######use both mfascicularis and hsapiens biomart#######

attributes1 <- c("ensembl_gene_id", "external_gene_name", "wikigene_name")
result1 <- getBM(attributes = attributes1,
                 filters = "ensembl_gene_id",
                 values = genes_2$`rownames(raw_list$Yang)`,
                 mart = dataset)

# Query for ensembl_gene_id, hsapiens_homolog_ensembl_gene, and hsapiens_homolog_associated_gene_name
attributes2 <- c("ensembl_gene_id", "hsapiens_homolog_ensembl_gene", "hsapiens_homolog_associated_gene_name")
result2 <- getBM(attributes = attributes2,
                 filters = "ensembl_gene_id",
                 values = genes_2$`rownames(raw_list$Yang)`,
                 mart = dataset)

result3 <- getBM(attributes = attributes1,
                      filters = "ensembl_gene_id",
                      values = genes_2$`rownames(raw_list$Yang)`,
                      mart = dataset2)

# Query Macaca fascicularis genes and their human homologs
result4 <- getBM(attributes = attributes2,
                      filters = "ensembl_gene_id",
                      values = genes_2$`rownames(raw_list$Yang)`,
                      mart = dataset2)

# Merge result1 and result2
merged_result <- merge(result1, result2, by = "ensembl_gene_id", all = TRUE)
merged_result <- merge(merged_result, result3, by = "ensembl_gene_id", all = TRUE)
merged_result <- merge(merged_result, result4, by = "ensembl_gene_id", all = TRUE)


# Define a function to assign gene names based on column priority
assign_gene_name <- function(row) {
  # Priority order of columns
  priority_columns <- c("external_gene_name.x", 
                        "hsapiens_homolog_associated_gene_name.x", 
                        "external_gene_name.y", 
                        "hsapiens_homolog_associated_gene_name.y")
  
  # Iterate over the columns and return the first non-NA value
  for (col in priority_columns) {
    if (!is.na(row[[col]]) && row[[col]] != "") {
      return(row[[col]])
    }
  }
  return(NA) # Return NA if all values are missing or empty
}

# Apply the function to each row in the data frame
merged_result$assigned_gene_name <- apply(merged_result, 1, assign_gene_name)

# Remove duplicates based on 'ensembl_gene_id', keeping the first occurrence
merged_result <- merged_result[!duplicated(merged_result$ensembl_gene_id), ]

merged_result <- merged_result[,c("ensembl_gene_id","assigned_gene_name")]

merged_result <- merged_result[match(genes_2$`rownames(raw_list$Yang)`,merged_result$ensembl_gene_id ),]

genes_2$genename <- merged_result$assigned_gene_name

genes.rest$genename <- genes.rest$`rownames(raw_list$Yang)`

gene.all <- rbind(genes_2,genes.rest )

#check again
any(duplicated(gene.all$genename )) 
## [1] TRUE
# Step 1: Identify all duplicated gene names
duplicated_indices <- duplicated(gene.all$genename) | duplicated(gene.all$genename, fromLast = TRUE)

# Step 2: Replace all occurrences of duplicated gene names with corresponding row names
gene.all$genename[duplicated_indices] <- gene.all$`rownames(raw_list$Yang)`[duplicated_indices]

# Step 3: Check for remaining duplicates
any(duplicated(gene.all$genename))
## [1] FALSE
gene.all <- gene.all[match(rownames(raw_list$Yang),gene.all$`rownames(raw_list$Yang)` ),]

##check again
rownames(raw_list$Yang)[1:5]
## [1] "PGBD2"   "ZNF692"  "ZNF672"  "SH3BP5L" "LYPD8"
rownames(raw_list$Yang) <-gene.all$genename

##Yang paper

##no annotation
Yang <- raw_list$Yang
VlnPlot(Yang, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.

Yang <- NormalizeData(Yang, normalization.method = "LogNormalize", scale.factor = 10000)
## Normalizing layer: counts
Yang <- FindVariableFeatures(Yang, selection.method = "vst", nfeatures = 2000)
## Finding variable features for layer counts
# Identify the 10 most highly variable genes
head(VariableFeatures(Yang), 10)
##  [1] "APOA1"              "PRG2"               "ENSMFAG00000002889"
##  [4] "FGF18"              "MMP12"              "TTR"               
##  [7] "HOPX"               "SLC40A1"            "PTPN21"            
## [10] "LGALS16"
##PCA
Yang <- ScaleData(Yang)
## Centering and scaling data matrix
Yang <- RunPCA(Yang, features = VariableFeatures(object = Yang))
## PC_ 1 
## Positive:  ENSMFAG00000040595, ENSMFAG00000036985, SNRPN, PTPRG, TCF4, PRKD1, GPC6, PBX1, TNNT1, ADD3 
##     EXT1, LSAMP, DPPA4, IGF1R, ENSMFAG00000002899, CADM1, BAMBI, DPYSL3, PTPRD, IFITM1 
##     PDPN, POU5F1B, ZNF462, FUOM, VIM, APOE, CXADR, DPPA5, FOXH1, IGF2BP2 
## Negative:  CDKN1C, PGF, CGA, HSD3B1, REEP5, GATA2, GCM1, CGB2, CD164, CYP11A1 
##     TFAP2A, GATA3, FDX1, PHLDA2, STARD10, BACE2, CGB8, VAMP8, PTGIS, PAPOLA 
##     CSGALNACT1, XAGE3, ST8SIA4, PID1, MLLT1, CD55, ARID3A, AVPI1, GPR137, BHLHE40 
## PC_ 2 
## Positive:  ENSMFAG00000037514, ISM2, LGALS3, ESAM, ECM1, INSL4, HSD17B2, NCAM1, TINAGL1, JPT1 
##     PLOD2, SERPING1, TGFB2, SERPINE1, MGLL, RASSF6, SRI, FBN1, ENSMFAG00000004652, LAIR2 
##     ITM2B, PLAC8, HS3ST1, C15orf48, TMEM132A, TPM1, ARHGDIB, CDH5, CORO1A, ITGA1 
## Negative:  FSCN1, RASAL2, RAB11FIP1, TBX3, SLC40A1, KLHL7, PPM1B, RAB8B, FGF18, PTPN21 
##     GADD45G, LGALS16, FGD6, F2RL1, HSPB8, SLC19A1, WNK2, AGAP1, LITAF, NEURL1 
##     CBLB, ZNF728, ID2, APOE, MLLT1, OVOL1, NEDD4L, TEAD4, ENSMFAG00000040354, ST8SIA4 
## PC_ 3 
## Positive:  HGF, COL3A1, EGFLAM, COL6A3, PITX1, COL6A2, MEIS2, CCDC80, COL4A1, HMGA2 
##     COL4A2, COL5A2, ADAM12, ZEB2, TEK, RSPO2, MSRB3, PITX2, TMEM88, F5 
##     SULT1C4, CTHRC1, HAND2, ZFPM2, CREB3L1, COL6A1, TIMP1, ITIH5, NAV3, WNT2 
## Negative:  L1TD1, POU5F1B, KHDC3L, DPPA5, ENSMFAG00000037727, NANOG, IFITM1, OLFM2, PTPRZ1, ZNF208 
##     KCTD8, SEPHS1, ANOS1, ENSMFAG00000003315, DNMT3B, IGFBP2, GABRB3, TDGF1, OOEP, CXCL12 
##     ENO3, SFRP2, TUBB2B, RRAGD, ENSMFAG00000043424, OPCML, ZYG11A, IGFBP6, LRRIQ1, CMBL 
## PC_ 4 
## Positive:  ENSMFAG00000038147, LRP2, NR2F2, CLMP, ANXA8, ADGRL3, PCLAF, KHDRBS3, NRP1, ATP6V1C2 
##     LRPAP1, BCAM, MET, SDK1, KCNMA1, TMC5, MED12L, LAMA1, ATG10, S100A10 
##     RPS6KA4, CCKBR, LYN, PRXL2A, ATP12A, NPM1, KIAA1217, MSX2, XAGE3, TSPAN5 
## Negative:  ENPP1, JAM2, PRG2, SERPINE2, ETS2, LAIR2, QSOX1, CHST11, MMP12, C15orf48 
##     SAT1, LAIR1, MMP2, ESAM, ADGRG6, SERPINE1, ENTPD1, IER3, PECAM1, RTN2 
##     SH3BP4, PPM1N, CALCB, VGLL3, PLPP3, EDNRB, HS3ST1, ITGA2, HLA-E, SEPTIN3 
## PC_ 5 
## Positive:  APOA1, S100A16, ENSMFAG00000045681, FTH1, S100A6, LRP2, S100A14, IHH, PRXL2A, APBB2 
##     FOXA2, KANK4, ENSMFAG00000045359, WWC2, SERPINE2, SOX17, NRG1, ADAM19, LAMA1, SLC9A3R1 
##     SMIM1, F2, CDA, LGR5, APOC3, TM4SF5, TNNC1, HNF4A, LLGL2, SYNE2 
## Negative:  HOPX, CGB7, ETV4, HSPB1, CGB8, ASB2, DPPA4, RBM24, SDC2, KRT23 
##     CGB2, SNRPN, LHB, PDLIM2, CGA, SBSN, NOSIP, SLC2A1, VSIG1, LSAMP 
##     ENSMFAG00000037727, POU5F1B, PLCB1, PTPRZ1, CKMT1B, ENSMFAG00000024252, ATF3, NANOG, HEY1, EPS8L1
VizDimLoadings(Yang, dims = 1:2, reduction = "pca")

DimPlot(Yang, reduction = "pca") + NoLegend()

DimHeatmap(Yang, dims = 1:6, cells = 500, balanced = TRUE)

ElbowPlot(Yang)

library(clustree)

Yang <- FindNeighbors(Yang, dims = 1:10, k=10) 
## Computing nearest neighbor graph
## Computing SNN
Yang <- FindClusters(Yang, resolution = seq(0.1,0.6,0.1), verbose = FALSE)

clustree(Yang)

Yang <- FindClusters(Yang, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 15503
## Number of edges: 204772
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9475
## Number of communities: 23
## Elapsed time: 0 seconds
Yang <- RunUMAP(Yang, dims = 1:10)
## 20:05:08 UMAP embedding parameters a = 0.9922 b = 1.112
## 20:05:08 Read 15503 rows and found 10 numeric columns
## 20:05:08 Using Annoy for neighbor search, n_neighbors = 30
## 20:05:08 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 20:05:10 Writing NN index file to temp file C:\Users\fxyin\AppData\Local\Temp\Rtmpi6jUzC\filee18788c7b8
## 20:05:10 Searching Annoy index using 1 thread, search_k = 3000
## 20:05:16 Annoy recall = 100%
## 20:05:17 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 20:05:19 Initializing from normalized Laplacian + noise (using RSpectra)
## 20:05:21 Commencing optimization for 200 epochs, with 614276 positive edges
## 20:05:44 Optimization finished
# individual clusters
DimPlot(Yang, reduction = "umap")

DimPlot(Yang, reduction = "umap", group.by = "stage")

Yang <- AddModuleScore(Yang,
                  features = epi,
                  name="epi_enriched", random.seed = 1)
## Warning: The following features are not present in the object: POU5F1, not
## searching for symbol synonyms
Yang <- AddModuleScore(Yang,
                  features = TE,
                  name="TE_enriched", random.seed = 1)

Yang <- AddModuleScore(Yang,
                  features = PE,
                  name="PE_enriched", random.seed = 1)

Yang <- AddModuleScore(Yang,
                  features = EXMC,
                  name="EXMC_enriched", random.seed = 1)

# Plot scores
DimPlot(Yang, reduction = "umap")

FeaturePlot(Yang,
            features = "epi_enriched1", label = TRUE, repel = TRUE) +
            scale_color_distiller(palette = "RdYlBu")
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

FeaturePlot(Yang,
            features = "TE_enriched1", label = TRUE, repel = TRUE) +
            scale_color_distiller(palette = "RdYlBu")
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

FeaturePlot(Yang,
            features = "PE_enriched1", label = TRUE, repel = TRUE) +
            scale_color_distiller(palette = "RdYlBu")
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

FeaturePlot(Yang,
            features = "EXMC_enriched1", label = TRUE, repel = TRUE) +
            scale_color_distiller(palette = "RdYlBu")
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

####rename the clusters#########################
Yang$anno <- c("NA")
Yang$anno[which(Idents(Yang) %in% c('18'))] <- c("PE")
Yang$anno[which(Idents(Yang) %in% c('2', '19', '22'))] <- c("EXMC")
Yang$anno[which(Idents(Yang) %in% c('1','9','17', '20','21'))] <- c("Epi")
Yang$anno[which(Yang$anno=="NA")] <- c("TE")


DimPlot(Yang, reduction = "umap", group.by = "anno")

###add the anno

saveRDS(Yang, file = "Monkey_Yang_reanno.rds")

raw_list$Yang$anno <- Yang$anno

######plot####################

VlnPlot(raw_list$Yang, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.

meta <- raw_list$Yang@meta.data
df2 <- as.data.frame(table(meta$stage, meta$anno))

##plot###n_cells before QC
ggplot(data=df2, aes(x=Var1, y=Freq, fill=Var2)) +
    geom_bar(stat="identity")+ 
  theme(text=element_text(size=30))

##Zhai, 2022 dataset ##fix gene names

genes <- as.data.frame(rownames(raw_list$Zhai_2022)) 
genes_2 <- genes[grep("ENSMFAG", genes[,1]),,drop=FALSE]
genes.rest <- genes[-grep("ENSMFAG", genes[,1]),,drop=FALSE]

######use both mfascicularis and hsapiens biomart#######

attributes1 <- c("ensembl_gene_id", "external_gene_name", "wikigene_name")
result1 <- getBM(attributes = attributes1,
                 filters = "ensembl_gene_id",
                 values = genes_2$`rownames(raw_list$Zhai_2022)`,
                 mart = dataset)

# Query for ensembl_gene_id, hsapiens_homolog_ensembl_gene, and hsapiens_homolog_associated_gene_name
attributes2 <- c("ensembl_gene_id", "hsapiens_homolog_ensembl_gene", "hsapiens_homolog_associated_gene_name")
result2 <- getBM(attributes = attributes2,
                 filters = "ensembl_gene_id",
                 values = genes_2$`rownames(raw_list$Zhai_2022)`,
                 mart = dataset)

result3 <- getBM(attributes = attributes1,
                      filters = "ensembl_gene_id",
                      values = genes_2$`rownames(raw_list$Zhai_2022)`,
                      mart = dataset2)

# Query Macaca fascicularis genes and their human homologs
result4 <- getBM(attributes = attributes2,
                      filters = "ensembl_gene_id",
                      values = genes_2$`rownames(raw_list$Zhai_2022)`,
                      mart = dataset2)

# Merge result1 and result2
merged_result <- merge(result1, result2, by = "ensembl_gene_id", all = TRUE)
merged_result <- merge(merged_result, result3, by = "ensembl_gene_id", all = TRUE)
merged_result <- merge(merged_result, result4, by = "ensembl_gene_id", all = TRUE)


# Define a function to assign gene names based on column priority
assign_gene_name <- function(row) {
  # Priority order of columns
  priority_columns <- c("external_gene_name.x", 
                        "hsapiens_homolog_associated_gene_name.x", 
                        "external_gene_name.y", 
                        "hsapiens_homolog_associated_gene_name.y")
  
  # Iterate over the columns and return the first non-NA value
  for (col in priority_columns) {
    if (!is.na(row[[col]]) && row[[col]] != "") {
      return(row[[col]])
    }
  }
  return(NA) # Return NA if all values are missing or empty
}

# Apply the function to each row in the data frame
merged_result$assigned_gene_name <- apply(merged_result, 1, assign_gene_name)

# Remove duplicates based on 'ensembl_gene_id', keeping the first occurrence
merged_result <- merged_result[!duplicated(merged_result$ensembl_gene_id), ]

merged_result <- merged_result[,c("ensembl_gene_id","assigned_gene_name")]

merged_result <- merged_result[match(genes_2$`rownames(raw_list$Zhai_2022)`,merged_result$ensembl_gene_id ),]

genes_2$genename <- merged_result$assigned_gene_name

genes.rest$genename <- genes.rest$`rownames(raw_list$Zhai_2022)`

gene.all <- rbind(genes_2,genes.rest )



#check again
any(duplicated(gene.all$genename )) 
## [1] TRUE
# Step 1: Identify all duplicated gene names
duplicated_indices <- duplicated(gene.all$genename) | duplicated(gene.all$genename, fromLast = TRUE)

# Step 2: Replace all occurrences of duplicated gene names with corresponding row names
gene.all$genename[duplicated_indices] <- gene.all$`rownames(raw_list$Zhai_2022)`[duplicated_indices]

# Step 3: Check for remaining duplicates
any(duplicated(gene.all$genename))
## [1] FALSE
gene.all <- gene.all[match(rownames(raw_list$Zhai_2022),gene.all$`rownames(raw_list$Zhai_2022)` ),]

##check again
rownames(raw_list$Zhai_2022)[1:5]
## [1] "PGBD2"   "ZNF692"  "ZNF672"  "SH3BP5L" "LYPD8"
rownames(raw_list$Zhai_2022) <-gene.all$genename
zhai_2022_anno <- read.csv("D:/xueying-work/afterPHD/Liu-lab/project1-embryo benchmarking/2024April/data/monkey-Zhai,2019/MFE56636-meta.csv", header = TRUE, sep = ",") 

##check colnames
colnames(raw_list$Zhai_2022)[1:5]
## [1] "AAACCCAAGACGGTCA-1_1_1_1" "AAACCCACAGTAACAA-1_1_1_1"
## [3] "AAACCCAGTACTCAAC-1_1_1_1" "AAACCCAGTATAGGAT-1_1_1_1"
## [5] "AAACCCAGTTCCCACT-1_1_1_1"
library(stringr)

df <- as.data.frame(str_split_fixed( zhai_2022_anno$X, "_", 2))
df$V3 <- as.data.frame(str_split_fixed( df$V2, "-", 2))[2]
table(df$V1, df$V3$V2)
##         
##              1     2     3     4     5     6     7
##   E20-E1  3559     0     0     0     0     0     0
##   E20-E2     0  8070     0     0     0     0     0
##   E23-E1     0     0  9073     0     0     0     0
##   E23-E2     0     0     0  8752     0     0     0
##   E26-E1     0     0     0     0  9726     0     0
##   E26-Y1     0     0     0     0     0  5318     0
##   E29-E1     0     0     0     0     0     0 12138
cells <- raw_list$Zhai_2022@meta.data[,c(1,4,7)]
cells$id <- rownames(cells)
cells$code <- as.data.frame(str_split_fixed(cells$id, "-", 2))[1]

#check
unique(cells$stage)
## [1] "CS8-e1"   "CS8-e2"   "CS9-e1"   "CS9-e2"   "CS11-e1"  "CS11-ys1" "CS11-e2"
cells$code2 <- "NA"
cells$code2[which(cells$stage=="CS8-e1")] <- paste0(cells$code$V1, "-1")[which(cells$stage=="CS8-e1")]
cells$code2[which(cells$stage=="CS8-e2")] <- paste0(cells$code$V1, "-2")[which(cells$stage=="CS8-e2")]
cells$code2[which(cells$stage=="CS9-e1")] <- paste0(cells$code$V1, "-3")[which(cells$stage=="CS9-e1")]
cells$code2[which(cells$stage=="CS9-e2")] <- paste0(cells$code$V1, "-4")[which(cells$stage=="CS9-e2")]
cells$code2[which(cells$stage=="CS11-e1")] <- paste0(cells$code$V1, "-5")[which(cells$stage=="CS11-e1")]
cells$code2[which(cells$stage=="CS11-ys1")] <- paste0(cells$code$V1, "-6")[which(cells$stage=="CS11-ys1")]
cells$code2[which(cells$stage=="CS11-e2")] <- paste0(cells$code$V1, "-7")[which(cells$stage=="CS11-e2")]

length(which(df$V2 %in% cells$code2))
## [1] 55335
#check which cells were not found
df2 <- df[-which(df$V2 %in% cells$code2),]

##match cell annotations

df2 <- zhai_2022_anno[,c(1,4,5,8)]


df2$code2 <- as.data.frame(str_split_fixed( zhai_2022_anno$X, "_", 2))[,2]


cells <- cells[,c(1,2,6)]
colnames(cells) <- c("dataset", "group", "code2")

meta <- merge(cells, df2, by="code2",all=TRUE )

meta <- meta[which(cells$code2 %in% meta$code2),]

meta <- meta[match(cells$code2, meta$code2),]

meta$cell_type[which(is.na(meta$cell_type))] <- "NA"

raw_list$Zhai_2022$anno <- meta$cell_type

#########plot###################

VlnPlot(raw_list$Zhai_2022, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.

meta <- raw_list$Zhai_2022@meta.data

df2 <- as.data.frame(table(meta$stage, meta$anno))

##plot###n_cells before QC
ggplot(data=df2, aes(x=Var1, y=Freq, fill=Var2)) +
    geom_bar(stat="identity")+ 
  theme(text=element_text(size=30))

##Gong dataset

genes <- as.data.frame(rownames(raw_list$Gong)) 
genes_2 <- genes[grep("ENSMFAG", genes[,1]),,drop=FALSE]
genes.rest <- genes[-grep("ENSMFAG", genes[,1]),,drop=FALSE]

######use both mfascicularis and hsapiens biomart#######

attributes1 <- c("ensembl_gene_id", "external_gene_name", "wikigene_name")
result1 <- getBM(attributes = attributes1,
                 filters = "ensembl_gene_id",
                 values = genes_2$`rownames(raw_list$Gong)`,
                 mart = dataset)

# Query for ensembl_gene_id, hsapiens_homolog_ensembl_gene, and hsapiens_homolog_associated_gene_name
attributes2 <- c("ensembl_gene_id", "hsapiens_homolog_ensembl_gene", "hsapiens_homolog_associated_gene_name")
result2 <- getBM(attributes = attributes2,
                 filters = "ensembl_gene_id",
                 values = genes_2$`rownames(raw_list$Gong)`,
                 mart = dataset)

result3 <- getBM(attributes = attributes1,
                      filters = "ensembl_gene_id",
                      values = genes_2$`rownames(raw_list$Gong)`,
                      mart = dataset2)

# Query Macaca fascicularis genes and their human homologs
result4 <- getBM(attributes = attributes2,
                      filters = "ensembl_gene_id",
                      values = genes_2$`rownames(raw_list$Gong)`,
                      mart = dataset2)

# Merge result1 and result2
merged_result <- merge(result1, result2, by = "ensembl_gene_id", all = TRUE)
merged_result <- merge(merged_result, result3, by = "ensembl_gene_id", all = TRUE)
merged_result <- merge(merged_result, result4, by = "ensembl_gene_id", all = TRUE)


# Define a function to assign gene names based on column priority
assign_gene_name <- function(row) {
  # Priority order of columns
  priority_columns <- c("external_gene_name.x", 
                        "hsapiens_homolog_associated_gene_name.x", 
                        "external_gene_name.y", 
                        "hsapiens_homolog_associated_gene_name.y")
  
  # Iterate over the columns and return the first non-NA value
  for (col in priority_columns) {
    if (!is.na(row[[col]]) && row[[col]] != "") {
      return(row[[col]])
    }
  }
  return(NA) # Return NA if all values are missing or empty
}

# Apply the function to each row in the data frame
merged_result$assigned_gene_name <- apply(merged_result, 1, assign_gene_name)

# Remove duplicates based on 'ensembl_gene_id', keeping the first occurrence
merged_result <- merged_result[!duplicated(merged_result$ensembl_gene_id), ]

merged_result <- merged_result[,c("ensembl_gene_id","assigned_gene_name")]


merged_result <- merged_result[match(genes_2$`rownames(raw_list$Gong)`,merged_result$ensembl_gene_id ),]

genes_2$genename <- merged_result$assigned_gene_name

genes.rest$genename <- genes.rest$`rownames(raw_list$Gong)`

gene.all <- rbind(genes_2,genes.rest )


#check again
any(duplicated(gene.all$genename )) 
## [1] TRUE
# Step 1: Identify all duplicated gene names
duplicated_indices <- duplicated(gene.all$genename) | duplicated(gene.all$genename, fromLast = TRUE)

# Step 2: Replace all occurrences of duplicated gene names with corresponding row names
gene.all$genename[duplicated_indices] <- gene.all$`rownames(raw_list$Gong)`[duplicated_indices]

# Step 3: Check for remaining duplicates
any(duplicated(gene.all$genename))
## [1] FALSE
gene.all <- gene.all[match(rownames(raw_list$Gong),gene.all$`rownames(raw_list$Gong)`),]

##check again
rownames(raw_list$Gong)[1:5]
## [1] "PGBD2"   "ZNF692"  "ZNF672"  "SH3BP5L" "LYPD8"
rownames(raw_list$Gong) <-gene.all$genename

##Gong dataset

#load R workspace
workspace_file <- "D:/xueying-work/afterPHD/Liu-lab/project1-embryo benchmarking/2024April/data/Monkey-Gong/Figure2_Allcells.Rdata" 
# Load the workspace
load(workspace_file)

colnames(Vitro)[1:5]
## [1] "AAACCCAAGGCTGAAC-1_1" "AAACCCATCGGAGTAG-1_1" "AAACCCATCGTGCGAC-1_1"
## [4] "AAACGAAGTCCACAGC-1_1" "AAACGCTGTTGTGGAG-1_1"
df <- Vitro@meta.data

df2 <- raw_list$Gong@meta.data
df2$id <- rownames(df2)


y1 <- as.data.frame(str_split_fixed(rownames(df), "_", 4))
unique(y1$V2)
##  [1] "1"  "2"  "3"  "4"  "5"  "6"  "7"  "8"  "9"  "10" "17" "18"
unique(y1$V3)
##  [1] ""   "11" "12" "13" "14" "15" "16" "17" "18" "19" "20" "23" "24"
unique(y1$V4)
## [1] ""   "21" "22"
unique(df$group)
##  [1] "ME20b"   "ME22b1"  "ME22b2"  "ME23b1"  "ME23b2"  "ME23b3"  "ME24b1" 
##  [8] "ME24b2"  "ME25b1"  "ME25b2"  "ME18"    "ME19"    "ME20"    "ME21"   
## [15] "ME22"    "ME23"    "ME24"    "ME25"    "ME18b_1" "ME18b_2" "ME19b_1"
## [22] "ME19b_2" "ME18C_1" "ME21C_1"
y1$id <- y1$V2
y1$id[which(y1$V3!="")] <- y1$V3[which(y1$V3!="")] 
y1$id[which(y1$V4!="")] <- y1$V4[which(y1$V4!="")] 
y1$group <- df$group
y1$cluster <- df$Cluster

table(y1$id, y1$group)
##     
##      ME18 ME18b_1 ME18b_2 ME18C_1 ME19 ME19b_1 ME19b_2 ME20 ME20b ME21 ME21C_1
##   1     0       0       0       0    0       0       0    0  2443    0       0
##   10    0       0       0       0    0       0       0    0     0    0       0
##   11 3759       0       0       0    0       0       0    0     0    0       0
##   12    0       0       0       0 3769       0       0    0     0    0       0
##   13    0       0       0       0    0       0       0 3979     0    0       0
##   14    0       0       0       0    0       0       0    0     0 1453       0
##   15    0       0       0       0    0       0       0    0     0    0       0
##   16    0       0       0       0    0       0       0    0     0    0       0
##   17    0       0       0       0    0       0       0    0     0    0       0
##   18    0       0       0       0    0       0       0    0     0    0       0
##   19    0    1211       0       0    0       0       0    0     0    0       0
##   2     0       0       0       0    0       0       0    0     0    0       0
##   20    0       0     874       0    0       0       0    0     0    0       0
##   21    0       0       0       0    0    2843       0    0     0    0       0
##   22    0       0       0       0    0       0    3990    0     0    0       0
##   23    0       0       0     899    0       0       0    0     0    0       0
##   24    0       0       0       0    0       0       0    0     0    0    2401
##   3     0       0       0       0    0       0       0    0     0    0       0
##   4     0       0       0       0    0       0       0    0     0    0       0
##   5     0       0       0       0    0       0       0    0     0    0       0
##   6     0       0       0       0    0       0       0    0     0    0       0
##   7     0       0       0       0    0       0       0    0     0    0       0
##   8     0       0       0       0    0       0       0    0     0    0       0
##   9     0       0       0       0    0       0       0    0     0    0       0
##     
##      ME22 ME22b1 ME22b2 ME23 ME23b1 ME23b2 ME23b3 ME24 ME24b1 ME24b2 ME25
##   1     0      0      0    0      0      0      0    0      0      0    0
##   10    0      0      0    0      0      0      0    0      0      0    0
##   11    0      0      0    0      0      0      0    0      0      0    0
##   12    0      0      0    0      0      0      0    0      0      0    0
##   13    0      0      0    0      0      0      0    0      0      0    0
##   14    0      0      0    0      0      0      0    0      0      0    0
##   15 2503      0      0    0      0      0      0    0      0      0    0
##   16    0      0      0 2556      0      0      0    0      0      0    0
##   17    0      0      0    0      0      0      0 5490      0      0    0
##   18    0      0      0    0      0      0      0    0      0      0 3209
##   19    0      0      0    0      0      0      0    0      0      0    0
##   2     0   1975      0    0      0      0      0    0      0      0    0
##   20    0      0      0    0      0      0      0    0      0      0    0
##   21    0      0      0    0      0      0      0    0      0      0    0
##   22    0      0      0    0      0      0      0    0      0      0    0
##   23    0      0      0    0      0      0      0    0      0      0    0
##   24    0      0      0    0      0      0      0    0      0      0    0
##   3     0      0   1997    0      0      0      0    0      0      0    0
##   4     0      0      0    0   5591      0      0    0      0      0    0
##   5     0      0      0    0      0   6053      0    0      0      0    0
##   6     0      0      0    0      0      0   5459    0      0      0    0
##   7     0      0      0    0      0      0      0    0   3212      0    0
##   8     0      0      0    0      0      0      0    0      0   2185    0
##   9     0      0      0    0      0      0      0    0      0      0    0
##     
##      ME25b1 ME25b2
##   1       0      0
##   10      0   3311
##   11      0      0
##   12      0      0
##   13      0      0
##   14      0      0
##   15      0      0
##   16      0      0
##   17      0      0
##   18      0      0
##   19      0      0
##   2       0      0
##   20      0      0
##   21      0      0
##   22      0      0
##   23      0      0
##   24      0      0
##   3       0      0
##   4       0      0
##   5       0      0
##   6       0      0
##   7       0      0
##   8       0      0
##   9    3311      0
any(duplicated(y1$V1))
## [1] TRUE
y2 <- as.data.frame(str_split_fixed(rownames(df2), "_", 2))

unique(y2$V2)
##  [1] "1_1_1_1_1_1_1_1_1_1_1_1" "2_1_1_1_1_1_1_1_1_1_1_1"
##  [3] "1_1_1_1_1_1_1_1_1_1_1"   "2_1_1_1_1_1_1_1_1_1_1"  
##  [5] "1_1_1_1_1_1_1_1_1_1"     "2_1_1_1_1_1_1_1_1_1"    
##  [7] "1_1_1_1_1_1_1_1_1"       "2_1_1_1_1_1_1_1_1"      
##  [9] "1_1_1_1_1_1_1_1"         "2_1_1_1_1_1_1_1"        
## [11] "1_1_1_1_1_1_1"           "2_1_1_1_1_1_1"          
## [13] "1_1_1_1_1_1"             "2_1_1_1_1_1"            
## [15] "1_1_1_1_1"               "2_1_1_1_1"              
## [17] "1_1_1_1"                 "2_1_1_1"                
## [19] "1_1_1"                   "2_1_1"                  
## [21] "1_1"                     "2_1"                    
## [23] "1"                       "2"
y2$group <- "NA"

y2$group[which(y2$V2=="1_1_1_1_1_1_1_1_1_1_1_1")] <- "ME18"
y2$group[which(y2$V2=="2_1_1_1_1_1_1_1_1_1_1_1")] <- "ME23b3"
y2$group[which(y2$V2=="1_1_1_1_1_1_1_1_1_1_1")] <- "ME23b2"
y2$group[which(y2$V2=="2_1_1_1_1_1_1_1_1_1_1")] <- "ME23b1"
y2$group[which(y2$V2=="1_1_1_1_1_1_1_1_1_1")] <- "ME23"
y2$group[which(y2$V2=="2_1_1_1_1_1_1_1_1_1")] <- "ME22b2"
y2$group[which(y2$V2=="1_1_1_1_1_1_1_1_1")] <- "ME22b1"
y2$group[which(y2$V2=="2_1_1_1_1_1_1_1_1")] <- "ME22"
y2$group[which(y2$V2=="1_1_1_1_1_1_1_1")] <- "ME21C_1"

y2$group[which(y2$V2=="2_1_1_1_1_1_1_1")] <- "ME21"
y2$group[which(y2$V2=="1_1_1_1_1_1_1")] <- "ME20b"
y2$group[which(y2$V2=="2_1_1_1_1_1_1")] <- "ME20"
y2$group[which(y2$V2=="1_1_1_1_1_1")] <- "ME19b_2"
y2$group[which(y2$V2=="2_1_1_1_1_1")] <- "ME19b_1"
y2$group[which(y2$V2=="1_1_1_1_1")] <- "ME19"
y2$group[which(y2$V2=="2_1_1_1_1")] <- "ME18C_1"
y2$group[which(y2$V2=="1_1_1_1")] <- "ME18b_2"
y2$group[which(y2$V2=="2_1_1_1")] <- "ME25b2"
y2$group[which(y2$V2=="1_1_1")] <- "ME25b1"
y2$group[which(y2$V2=="2_1_1")] <- "ME25"
y2$group[which(y2$V2=="1_1")] <- "ME24b2"
y2$group[which(y2$V2=="2_1")] <- "ME24b1"
y2$group[which(y2$V2=="1")] <- "ME24"
y2$group[which(y2$V2=="2")] <- "ME18b_1"

y2$id2 <- paste0(y2$V1,"_",y2$group)
y1$id2 <- paste0(y1$V1,"_",y1$group)

length(which(y2$id2 %in% y1$id2))
## [1] 74466
y2$rownames <- rownames(df2)

meta <- merge(y2, y1, by="id2", all=TRUE)
meta <- meta[which(meta$rownames %in% rownames(df2)),]
meta <- meta[,c(5,11,12)]

meta <- meta[match(colnames(raw_list$Gong) ,meta$rownames),]
meta$group.y[which(is.na(meta$group.y))] <- "NA"
meta$cluster[which(is.na(meta$cluster))] <- "NA"

raw_list$Gong$anno <- meta$cluster
raw_list$Gong$embryo <- meta$group.y

rm(Vitro)

###################################

VlnPlot(raw_list$Gong, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`

meta <- raw_list$Gong@meta.data
df2 <- as.data.frame(table(meta$stage, meta$anno))

##plot###n_cells before QC
ggplot(data=df2, aes(x=Var1, y=Freq, fill=Var2)) +
    geom_bar(stat="identity")+ 
  theme(text=element_text(size=30))

#remove NAs
df <- raw_list$Gong@meta.data
table(df$anno)
## 
##     Allantois            AM        Amnion         Blood        Border 
##          9906            66          3765          1015          1976 
## Cardiomyocyte           CTB            DE          Endo           EPI 
##          3651          8626           430          1925           982 
##     Epidermis           EVT           EXM       Foregut       Hindgut 
##          3695          2958          2630           429           381 
##           LPM           Mes            NA   NeuralPlate          ParE 
##          9843          6740         59556           114           293 
##           PGC            PM            PS           STB            VE 
##          1101           201           996          5329          1043 
##        ysMeso 
##          6371
cells_to_keep <- colnames(raw_list$Gong)[df$anno!="NA"]

raw_list$Gong<- subset(raw_list$Gong, cells = cells_to_keep)

##Zhai_2023 use provided count matrix

###Zhai_2023

Zhai_2023_paper <- read.table("D:/xueying-work/afterPHD/Liu-lab/project1-embryo benchmarking/2024April/data/monkey-Zhai,2023/GSE218523_counts.csv/GSE218523_counts.csv", header = TRUE, sep = "\t", stringsAsFactors = FALSE)

colnames(Zhai_2023_paper)[1:5]
## [1] "gene"                 "X20210303.YHY.1_sc1"  "X20210303.YHY.1_sc10"
## [4] "X20210303.YHY.1_sc11" "X20210303.YHY.1_sc12"
rownames(Zhai_2023_paper)[1:5]
## [1] "1" "2" "3" "4" "5"
rownames(Zhai_2023_paper) <- Zhai_2023_paper[,1]
Zhai_2023_paper <- Zhai_2023_paper[,-1,drop=FALSE]

anno2 <- read.csv("D:/xueying-work/afterPHD/Liu-lab/project1-embryo benchmarking/2024April/data/monkey-Zhai,2023/GSE218523_meta.csv", sep='\t') 

Zhai_2023_paper <- CreateSeuratObject(counts = Zhai_2023_paper, project = "Zhai_2023_paper")
## Warning: Data is of class data.frame. Coercing to dgCMatrix.
Zhai_2023_paper[["percent.mt"]] <- PercentageFeatureSet(Zhai_2023_paper, pattern = "^MT.")
Idents(Zhai_2023_paper) <- "Zhai_2023"
VlnPlot(Zhai_2023_paper, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.

anno2 <- anno2[match( colnames(Zhai_2023_paper), anno2$cells_id),]

Zhai_2023_paper$stage <- anno2$stage
Zhai_2023_paper$orig.ident <- "Zhai_2023"
Zhai_2023_paper$species <- "Macaca fascicularis"
Zhai_2023_paper$embryo <- anno2$embroy2
Zhai_2023_paper$platform <- "scChaRM-seq"
Zhai_2023_paper$anno <- anno2$celltype

raw_list$Zhai_2023 <- Zhai_2023_paper

names(raw_list)[8] <- "Zhai_2023_paper"


raw_list$Zhai_2023_paper$anno <- as.data.frame(str_split_fixed(raw_list$Zhai_2023_paper$anno, " ", 2))[,2]


VlnPlot(raw_list$Zhai_2023_paper, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.

#filter cells with thresholds
metadata <- raw_list$Zhai_2023_paper@meta.data
cells_to_keep <- colnames(raw_list$Zhai_2023_paper)[metadata$nFeature_RNA > 500 & metadata$nCount_RNA > 1000 & metadata$percent.mt < 15]
raw_list$Zhai_2023_paper <- subset(raw_list$Zhai_2023_paper, cells = cells_to_keep)

VlnPlot(raw_list$Zhai_2023_paper, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.

meta <- raw_list$Zhai_2023_paper@meta.data
df2 <- as.data.frame(table(meta$stage, meta$anno))

##plot###n_cells before QC
ggplot(data=df2, aes(x=Var1, y=Freq, fill=Var2)) +
    geom_bar(stat="identity")+ 
  theme(text=element_text(size=30))

##data correction

#check meta data
for (i in 1:length(raw_list)) {
  cat("Column names of dataset",names(raw_list[i]) , ":\n")
  print(colnames(raw_list[[i]]@meta.data))
  cat("\n")  # Print a newline for better readability between datasets
}
## Column names of dataset Sasaki_paper :
## [1] "orig.ident"   "nCount_RNA"   "nFeature_RNA" "percent.mt"   "stage"       
## [6] "species"      "embryo"       "platform"     "anno"        
## 
## Column names of dataset Nakamura_paper :
## [1] "orig.ident"   "nCount_RNA"   "nFeature_RNA" "percent.mt"   "stage"       
## [6] "species"      "embryo"       "platform"     "anno"        
## 
## Column names of dataset Zhai_2022 :
## [1] "orig.ident"   "nCount_RNA"   "nFeature_RNA" "stage"        "percent.mt"  
## [6] "species"      "embryo"       "platform"     "anno"        
## 
## Column names of dataset Ma_paper :
## [1] "orig.ident"   "nCount_RNA"   "nFeature_RNA" "stage"        "anno"        
## [6] "species"      "embryo"       "platform"     "percent.mt"  
## 
## Column names of dataset Niu_paper :
## [1] "orig.ident"   "nCount_RNA"   "nFeature_RNA" "percent.mt"   "stage"       
## [6] "species"      "embryo"       "platform"     "anno"        
## 
## Column names of dataset Yang :
## [1] "orig.ident"   "nCount_RNA"   "nFeature_RNA" "stage"        "percent.mt"  
## [6] "species"      "embryo"       "platform"     "anno"        
## 
## Column names of dataset Gong :
## [1] "orig.ident"   "nCount_RNA"   "nFeature_RNA" "stage"        "percent.mt"  
## [6] "species"      "embryo"       "platform"     "anno"        
## 
## Column names of dataset Zhai_2023_paper :
## [1] "orig.ident"   "nCount_RNA"   "nFeature_RNA" "percent.mt"   "stage"       
## [6] "species"      "embryo"       "platform"     "anno"
newlist <- c("orig.ident","nCount_RNA","nFeature_RNA","sample","stage", "percent.mt","species" , "embryo", "platform", "anno","anno_sub", "anno_harmo" )

# Iterate over each dataset in raw_list
for (i in 1:length(raw_list)) {
  # Identify missing columns
  missing_columns <- setdiff(newlist, colnames(raw_list[[i]]@meta.data))
  
  # Add missing columns with NA values
  for (col in missing_columns) {
    raw_list[[i]]@meta.data[[col]] <- NA
  }
  
  # Reorder columns to match the desired order
  raw_list[[i]]@meta.data <- raw_list[[i]]@meta.data[, newlist, drop = FALSE]
}

# Check the result by printing the column names of each dataset
for (i in 1:length(raw_list)) {
  cat("Column names of unified dataset", names(raw_list[i]), ":\n")
  print(colnames(raw_list[[i]]@meta.data))
  cat("\n")  # Print a newline for better readability between datasets
}
## Column names of unified dataset Sasaki_paper :
##  [1] "orig.ident"   "nCount_RNA"   "nFeature_RNA" "sample"       "stage"       
##  [6] "percent.mt"   "species"      "embryo"       "platform"     "anno"        
## [11] "anno_sub"     "anno_harmo"  
## 
## Column names of unified dataset Nakamura_paper :
##  [1] "orig.ident"   "nCount_RNA"   "nFeature_RNA" "sample"       "stage"       
##  [6] "percent.mt"   "species"      "embryo"       "platform"     "anno"        
## [11] "anno_sub"     "anno_harmo"  
## 
## Column names of unified dataset Zhai_2022 :
##  [1] "orig.ident"   "nCount_RNA"   "nFeature_RNA" "sample"       "stage"       
##  [6] "percent.mt"   "species"      "embryo"       "platform"     "anno"        
## [11] "anno_sub"     "anno_harmo"  
## 
## Column names of unified dataset Ma_paper :
##  [1] "orig.ident"   "nCount_RNA"   "nFeature_RNA" "sample"       "stage"       
##  [6] "percent.mt"   "species"      "embryo"       "platform"     "anno"        
## [11] "anno_sub"     "anno_harmo"  
## 
## Column names of unified dataset Niu_paper :
##  [1] "orig.ident"   "nCount_RNA"   "nFeature_RNA" "sample"       "stage"       
##  [6] "percent.mt"   "species"      "embryo"       "platform"     "anno"        
## [11] "anno_sub"     "anno_harmo"  
## 
## Column names of unified dataset Yang :
##  [1] "orig.ident"   "nCount_RNA"   "nFeature_RNA" "sample"       "stage"       
##  [6] "percent.mt"   "species"      "embryo"       "platform"     "anno"        
## [11] "anno_sub"     "anno_harmo"  
## 
## Column names of unified dataset Gong :
##  [1] "orig.ident"   "nCount_RNA"   "nFeature_RNA" "sample"       "stage"       
##  [6] "percent.mt"   "species"      "embryo"       "platform"     "anno"        
## [11] "anno_sub"     "anno_harmo"  
## 
## Column names of unified dataset Zhai_2023_paper :
##  [1] "orig.ident"   "nCount_RNA"   "nFeature_RNA" "sample"       "stage"       
##  [6] "percent.mt"   "species"      "embryo"       "platform"     "anno"        
## [11] "anno_sub"     "anno_harmo"
##correct stage names
#Nakamura
raw_list$Nakamura_paper$stage <- as.character(raw_list$Nakamura_paper$stage)

raw_list$Nakamura_paper$stage[raw_list$Nakamura_paper$stage == "E06"] <- "E6"
raw_list$Nakamura_paper$stage[raw_list$Nakamura_paper$stage == "E07"] <- "E7"
raw_list$Nakamura_paper$stage[raw_list$Nakamura_paper$stage == "E08"] <- "E8"
raw_list$Nakamura_paper$stage[raw_list$Nakamura_paper$stage == "E09"] <- "E9"

raw_list$Nakamura_paper$stage <- factor(raw_list$Nakamura_paper$stage)

#Zhai_2022
raw_list$Zhai_2022$embryo <- raw_list$Zhai_2022$stage
raw_list$Zhai_2022$stage[raw_list$Zhai_2022$stage %in% c("CS11-e1","CS11-e2","CS11-ys1")   ] <- "CS11"
raw_list$Zhai_2022$stage[raw_list$Zhai_2022$stage %in% c("CS8-e1","CS8-e2")   ] <- "CS8"
raw_list$Zhai_2022$stage[raw_list$Zhai_2022$stage %in% c("CS9-e1","CS9-e2")   ] <- "CS9"

#Ma_paper
raw_list$Ma_paper$stage <- gsub("d.p.f.","E", raw_list$Ma_paper$stage )

#Niu_paper
raw_list$Niu_paper$stage <- gsub("E09","E9", raw_list$Niu_paper$stage )

#Yang_paper
raw_list$Yang$stage <-  gsub("D", "E", raw_list$Yang$stage )


##add IVC label
raw_list$Ma_paper$stage <- paste(raw_list$Ma_paper$stage, "_IVC",sep = "")
raw_list$Niu_paper$stage <- paste(raw_list$Niu_paper$stage, "_IVC",sep = "")
raw_list$Yang$stage <- paste(raw_list$Yang$stage , "_IVC",sep = "")
raw_list$Zhai_2023_paper$stage <- paste(raw_list$Zhai_2023_paper$stage , "_IVC",sep = "")
raw_list$Gong$stage <- paste(raw_list$Gong$stage , "_IVC",sep = "")

#print out age
age <- do.call(rbind, lapply(1:length(raw_list), function(x) {
  
df <- as.data.frame(table(raw_list[[x]]$stage) ) 
 df$dataset <- names(raw_list[x])
  
 return(df)
}))

#check table
print(age)
##       Var1  Freq         dataset
## 1      E13     3    Sasaki_paper
## 2      E14     1    Sasaki_paper
## 3      E16    11    Sasaki_paper
## 4      E17     2    Sasaki_paper
## 5      E20     8    Sasaki_paper
## 6      E13    46  Nakamura_paper
## 7      E14    62  Nakamura_paper
## 8      E16    36  Nakamura_paper
## 9      E17    53  Nakamura_paper
## 10      E6    43  Nakamura_paper
## 11      E7    35  Nakamura_paper
## 12      E8    95  Nakamura_paper
## 13      E9    20  Nakamura_paper
## 14    CS11 31314       Zhai_2022
## 15     CS8 14286       Zhai_2022
## 16     CS9 18975       Zhai_2022
## 17 E11_IVC   154        Ma_paper
## 18 E12_IVC   249        Ma_paper
## 19 E13_IVC   162        Ma_paper
## 20 E14_IVC   322        Ma_paper
## 21 E16_IVC   336        Ma_paper
## 22 E17_IVC   230        Ma_paper
## 23 E11_IVC    57       Niu_paper
## 24 E13_IVC    96       Niu_paper
## 25 E15_IVC   121       Niu_paper
## 26 E17_IVC   140       Niu_paper
## 27 E19_IVC    66       Niu_paper
## 28 E20_IVC    84       Niu_paper
## 29  E9_IVC    36       Niu_paper
## 30 E10_IVC  2847            Yang
## 31 E12_IVC  2919            Yang
## 32 E14_IVC  9737            Yang
## 33 E18_IVC  6742            Gong
## 34 E19_IVC 10602            Gong
## 35 E20_IVC  6422            Gong
## 36 E21_IVC  3854            Gong
## 37 E22_IVC  6475            Gong
## 38 E23_IVC 19659            Gong
## 39 E24_IVC 10881            Gong
## 40 E25_IVC  9831            Gong
## 41 E16_IVC   480 Zhai_2023_paper
## 42 E18_IVC   375 Zhai_2023_paper
## 43 E20_IVC   516 Zhai_2023_paper
## 44 E21_IVC   496 Zhai_2023_paper
## 45 E22_IVC   529 Zhai_2023_paper
## 46 E23_IVC   547 Zhai_2023_paper
## 47 E24_IVC   280 Zhai_2023_paper
## 48 E25_IVC   457 Zhai_2023_paper

##organize label metadata

#####Sasaki_paper#############
unique(raw_list$Sasaki_paper$anno)
## [1] "Gast" "ePGC"
raw_list$Sasaki_paper$anno_harmo <- raw_list$Sasaki_paper$anno
raw_list$Sasaki_paper$anno_harmo[which(raw_list$Sasaki_paper$anno_harmo=="Gast")] <- "Primitive.streak"
raw_list$Sasaki_paper$anno_harmo[which(raw_list$Sasaki_paper$anno_harmo=="ePGC")] <- "PGC"

unique(raw_list$Sasaki_paper$anno_harmo)
## [1] "Primitive.streak" "PGC"
#####Nakamura_paper#############
unique(raw_list$Nakamura_paper$anno)
##  [1] "EXMC"      "Gast1"     "Gast2a"    "Gast2b"    "Hypoblast" "ICM"      
##  [7] "Post-paTE" "PostE-EPI" "PostL-EPI" "Pre-EPI"   "PreE-TE"   "PreL-TE"  
## [13] "VE/YE"
raw_list$Nakamura_paper$anno_harmo <- raw_list$Nakamura_paper$anno
raw_list$Nakamura_paper$anno_harmo[grep("Gast", raw_list$Nakamura_paper$anno_harmo)] <- "Primitive.streak"
raw_list$Nakamura_paper$anno_harmo[which(raw_list$Nakamura_paper$anno_harmo=="EXMC")] <- "ExE.Mesoderm"
raw_list$Nakamura_paper$anno_harmo[grep("TE", raw_list$Nakamura_paper$anno)] <- "TE/TrB"
raw_list$Nakamura_paper$anno_harmo[grep("EPI", raw_list$Nakamura_paper$anno)] <- "Epiblast"
raw_list$Nakamura_paper$anno_harmo[grep("VE/YE", raw_list$Nakamura_paper$anno)] <- "ExE.Endoderm"

#####Zhai_2022#############
unique(raw_list$Zhai_2022$anno)
##  [1] "Nas.Meso"    "EPI"         "Gut"         "ECT"         "NA"         
##  [6] "PGC"         "Mes"         "VE"          "ys.Endo1"    "APS"        
## [11] "AM"          "LP.Meso"     "AI"          "ys.Endo2"    "EC"         
## [16] "DE"          "ExE.Meso"    "ys.Meso2"    "SE2"         "SE1"        
## [21] "Pharyg.Meso" "ys.Meso1"    "PS"          "Caud.Meso"   "Node"       
## [26] "NC"          "BP"          "Ery2"        "Ery1"        "Mac"        
## [31] "Para.Meso"   "SC"          "FB/MB/HB"    "Cardi.Meso"  "Rostr.Meso" 
## [36] "NMP"         "Cardi. "     "Inter.Meso"  "PSM"
raw_list$Zhai_2022$anno_harmo <- raw_list$Zhai_2022$anno
raw_list$Zhai_2022$anno_harmo[which(raw_list$Zhai_2022$anno=="EPI")] <- "Epiblast"
raw_list$Zhai_2022$anno_harmo[which(raw_list$Zhai_2022$anno %in% c("Gut", "DE")  )] <- "Endoderm"
raw_list$Zhai_2022$anno_harmo[which(raw_list$Zhai_2022$anno %in% c("ECT", "AM","SE1", "SE2")  )] <- "Non-Neural.Ectoderm"
raw_list$Zhai_2022$anno_harmo[which(raw_list$Zhai_2022$anno %in% c("NC", "SC","FB/MB/HB")  )] <- "Neural.Ectoderm"
raw_list$Zhai_2022$anno_harmo[which(raw_list$Zhai_2022$anno=="PGC")] <- "PGC"
raw_list$Zhai_2022$anno_harmo[which(raw_list$Zhai_2022$anno %in% c("VE", "ys.Endo1","ys.Endo2")  )] <- "ExE.Endoderm"
raw_list$Zhai_2022$anno_harmo[which(raw_list$Zhai_2022$anno %in% c("APS", "PS","NMP")  )] <- "Primitive.streak"
raw_list$Zhai_2022$anno_harmo[which(raw_list$Zhai_2022$anno=="Node")] <- "Node"
raw_list$Zhai_2022$anno_harmo[which(raw_list$Zhai_2022$anno %in% c("BP","Ery2", "Ery1","Mac","EC")  )] <- "Hemogenic.Endothelial.Progenitors"
raw_list$Zhai_2022$anno_harmo[which(raw_list$Zhai_2022$anno %in% c("ys.Meso1","ys.Meso2","AI","ExE.Meso")  )] <- "ExE.Mesoderm"

raw_list$Zhai_2022$anno_harmo[which(raw_list$Zhai_2022$anno %in% c("Mes","Nas.Meso", "LP.Meso","Pharyg.Meso","Caud.Meso", "Para.Meso", "Cardi.Meso","Rostr.Meso", "Cardi. ","Inter.Meso","PSM")  )] <- "Mesoderm"
raw_list$Zhai_2022$anno_harmo[which(raw_list$Zhai_2022$anno=="NA")] <- "Unknown"


##Ma_paper######
unique(raw_list$Ma_paper$anno)
##  [1] "TE and derivatives" "postE-EPI"          "EXMC"              
##  [4] "VE/YE"              "E-Gast"             "E-AM"              
##  [7] "L-Gast1"            "L-Gast2"            "L-AM2"             
## [10] "postL-EPI"          "L-AM1"              "E-PGC"
raw_list$Ma_paper$anno_harmo <- raw_list$Ma_paper$anno
raw_list$Ma_paper$anno_harmo[which(raw_list$Ma_paper$anno=="TE and derivatives")] <- "TE/TrB"
raw_list$Ma_paper$anno_harmo[grep("EPI", raw_list$Ma_paper$anno)] <- "Epiblast"
raw_list$Ma_paper$anno_harmo[which(raw_list$Ma_paper$anno=="EXMC")] <- "ExE.Mesoderm"
raw_list$Ma_paper$anno_harmo[which(raw_list$Ma_paper$anno=="VE/YE")] <- "ExE.Endoderm"
raw_list$Ma_paper$anno_harmo[grep("Gast", raw_list$Ma_paper$anno)] <- "Primitive.streak"
raw_list$Ma_paper$anno_harmo[which(raw_list$Ma_paper$anno %in% c("E-AM","L-AM2","L-AM1") )] <- "Non-Neural.Ectoderm"
raw_list$Ma_paper$anno_harmo[which(raw_list$Ma_paper$anno=="E-PGC")] <- "PGC"


##Niu_paper####
unique(raw_list$Niu_paper$anno)
## [1] "NA"   "PE"   "Epi"  "EXMC" "TE"
raw_list$Niu_paper$anno_harmo <- raw_list$Niu_paper$anno
raw_list$Niu_paper$anno_harmo[which(raw_list$Niu_paper$anno=="PE")] <- "Hypoblast"
raw_list$Niu_paper$anno_harmo[which(raw_list$Niu_paper$anno=="Epi")] <- "Epiblast"
raw_list$Niu_paper$anno_harmo[which(raw_list$Niu_paper$anno=="EXMC")] <- "ExE.Mesoderm"
raw_list$Niu_paper$anno_harmo[which(raw_list$Niu_paper$anno=="TE")] <- "TE/TrB"
raw_list$Niu_paper$anno_harmo[which(raw_list$Niu_paper$anno=="NA")] <- "Unknown"


##Yang####
unique(raw_list$Yang$anno)
## [1] "PE"   "TE"   "Epi"  "EXMC"
raw_list$Yang$anno_harmo <- raw_list$Yang$anno
raw_list$Yang$anno_harmo[which(raw_list$Yang$anno=="PE")] <- "Hypoblast"
raw_list$Yang$anno_harmo[which(raw_list$Yang$anno=="TE")] <- "TE/TrB"
raw_list$Yang$anno_harmo[which(raw_list$Yang$anno=="Epi")] <- "Epiblast"
raw_list$Yang$anno_harmo[which(raw_list$Yang$anno=="EXMC")] <- "ExE.Mesoderm"

##Zhai_2023_paper###
unique(raw_list$Zhai_2023_paper$anno)
##  [1] "PS and derivative"       "EXMC"                   
##  [3] "EPI"                     "Extraembryonic Mesoderm"
##  [5] "EVT"                     "Mesoderm"               
##  [7] "Endoderm"                "STB"                    
##  [9] "CTB"                     "Non-neural ectoderm"    
## [11] "Blood cells"             "Neural ectoderm"
raw_list$Zhai_2023_paper$anno_harmo <- raw_list$Zhai_2023_paper$anno
raw_list$Zhai_2023_paper$anno_harmo[which(raw_list$Zhai_2023_paper$anno=="PS and derivative")] <- "Primitive.streak"
raw_list$Zhai_2023_paper$anno_harmo[which(raw_list$Zhai_2023_paper$anno=="EXMC")] <- "ExE.Mesoderm"
raw_list$Zhai_2023_paper$anno_harmo[which(raw_list$Zhai_2023_paper$anno=="EPI")] <- "Epiblast"
raw_list$Zhai_2023_paper$anno_harmo[which(raw_list$Zhai_2023_paper$anno=="Extraembryonic Mesoderm")] <- "ExE.Mesoderm"
raw_list$Zhai_2023_paper$anno_harmo[which(raw_list$Zhai_2023_paper$anno %in% c("EVT", "STB", "CTB"))] <- "TE/TrB"
raw_list$Zhai_2023_paper$anno_harmo[which(raw_list$Zhai_2023_paper$anno=="Non-neural ectoderm")] <- "Non-Neural.Ectoderm"
raw_list$Zhai_2023_paper$anno_harmo[which(raw_list$Zhai_2023_paper$anno=="Blood cells")] <- "Hemogenic.Endothelial.Progenitors"
raw_list$Zhai_2023_paper$anno_harmo[which(raw_list$Zhai_2023_paper$anno=="Neural ectoderm")] <- "Neural.Ectoderm"
raw_list$Zhai_2023_paper$anno_harmo[which(raw_list$Zhai_2023_paper$anno=="Endoderm")] <- "Endoderm"
raw_list$Zhai_2023_paper$anno_harmo[which(raw_list$Zhai_2023_paper$anno=="Mesoderm")] <- "Mesoderm"

##Gong####
unique(raw_list$Gong$anno)
##  [1] "LPM"           "ysMeso"        "EXM"           "Allantois"    
##  [5] "PS"            "Endo"          "DE"            "Mes"          
##  [9] "Cardiomyocyte" "VE"            "Border"        "PM"           
## [13] "STB"           "Amnion"        "CTB"           "Hindgut"      
## [17] "Epidermis"     "EVT"           "EPI"           "AM"           
## [21] "PGC"           "ParE"          "NeuralPlate"   "Blood"        
## [25] "Foregut"
raw_list$Gong$anno_harmo <- raw_list$Gong$anno
raw_list$Gong$anno_harmo[which(raw_list$Gong$anno %in% c("ysMeso", "EXM", "Allantois")  )] <- "ExE.Mesoderm"
raw_list$Gong$anno_harmo[which(raw_list$Gong$anno=="PS"  )] <- "Primitive.streak"
raw_list$Gong$anno_harmo[which(raw_list$Gong$anno %in% c("Endo",    "Blood" )  )] <- "Hemogenic.Endothelial.Progenitors"
raw_list$Gong$anno_harmo[which(raw_list$Gong$anno %in% c( "DE", "Hindgut","Foregut")  )] <- "Endoderm"
raw_list$Gong$anno_harmo[which(raw_list$Gong$anno %in% c("VE","ParE")  )] <- "ExE.Endoderm"
raw_list$Gong$anno_harmo[which(raw_list$Gong$anno %in% c("CTB","STB","EVT")  )] <- "TE/TrB"
raw_list$Gong$anno_harmo[which(raw_list$Gong$anno=="PGC")] <- "PGC"
raw_list$Gong$anno_harmo[which(raw_list$Gong$anno %in% c(   "NeuralPlate" )  )] <- "Neural.Ectoderm"
raw_list$Gong$anno_harmo[which(raw_list$Gong$anno %in% c("EPI")  )] <- "Epiblast"
raw_list$Gong$anno_harmo[which(raw_list$Gong$anno %in% c("Border","Amnion","Epidermis")  )] <- "Non-Neural.Ectoderm"
raw_list$Gong$anno_harmo[which(raw_list$Gong$anno %in% c("LPM", "Mes", "Cardiomyocyte","PM","AM")  )] <- "Mesoderm"


#add anno_sub
raw_list$Sasaki_paper$anno_sub <- raw_list$Sasaki_paper$anno
raw_list$Nakamura_paper$anno_sub <- raw_list$Nakamura_paper$anno
raw_list$Zhai_2022$anno_sub <- raw_list$Zhai_2022$anno
raw_list$Ma_paper$anno_sub <- raw_list$Ma_paper$anno
raw_list$Yang$anno_sub <- raw_list$Yang$anno
raw_list$Gong$anno_sub <- raw_list$Gong$anno
raw_list$Niu_paper$anno_sub <- raw_list$Niu_paper$anno
raw_list$Zhai_2023_paper$anno_sub <- raw_list$Zhai_2023_paper$anno

#generate label metadata

##Sasaki########
df_Sasaki<- data.frame("dataset"= rep(c("Sasaki")),
                      "stage" = rep(c("E13-E20")),
                      "orig.anno" = raw_list$Sasaki_paper$anno,             
                      "orig.sub_anno" = raw_list$Sasaki_paper$anno_sub,
                      "harmo.anno" = raw_list$Sasaki_paper$anno_harmo )

df_Sasaki<- unique(df_Sasaki)

##Nakamura#########################
df_Nakamura <- data.frame("dataset"= rep(c("Nakamura")),
                      "stage" = rep(c("E6-E17")),
                      "orig.anno" = raw_list$Nakamura_paper$anno,             
                      "orig.sub_anno" = raw_list$Nakamura_paper$anno_sub,
                      "harmo.anno" = raw_list$Nakamura_paper$anno_harmo )

df_Nakamura<- unique(df_Nakamura)


##Zhai_2022#########################
df_Zhai_2022<- data.frame("dataset"= rep(c("Zhai_2022")),
                      "stage" = rep(c("E20-E29")),
                      "orig.anno" = raw_list$Zhai_2022$anno,             
                      "orig.sub_anno" = raw_list$Zhai_2022$anno_sub,
                      "harmo.anno" = raw_list$Zhai_2022$anno_harmo )

df_Zhai_2022<- unique(df_Zhai_2022)

##Ma#########################
df_Ma<- data.frame("dataset"= rep(c("Ma")),
                      "stage" = rep(c("E11-E17")),
                      "orig.anno" = raw_list$Ma_paper$anno,             
                      "orig.sub_anno" = raw_list$Ma_paper$anno_sub,
                      "harmo.anno" = raw_list$Ma_paper$anno_harmo )

df_Ma<- unique(df_Ma)

##Niu#########################
df_Niu<- data.frame("dataset"= rep(c("Niu")),
                      "stage" = rep(c("E9-E20")),
                      "orig.anno" = raw_list$Niu_paper$anno,             
                      "orig.sub_anno" = raw_list$Niu_paper$anno_sub,
                      "harmo.anno" = raw_list$Niu_paper$anno_harmo )

df_Niu<- unique(df_Niu)

##Yang#########################
df_Yang<- data.frame("dataset"= rep(c("Yang")),
                      "stage" = rep(c("E10-E14")),
                      "orig.anno" = raw_list$Yang$anno,             
                      "orig.sub_anno" = raw_list$Yang$anno_sub,
                      "harmo.anno" = raw_list$Yang$anno_harmo )

df_Yang<- unique(df_Yang)

##Gong#########################
df_Gong<- data.frame("dataset"= rep(c("Gong")),
                      "stage" = rep(c("E18-E25")),
                      "orig.anno" = raw_list$Gong$anno,             
                      "orig.sub_anno" = raw_list$Gong$anno_sub,
                      "harmo.anno" = raw_list$Gong$anno_harmo )

df_Gong<- unique(df_Gong)

##Zhai_2023#########################
df_Zhai_2023_paper<- data.frame("dataset"= rep(c("Zhai_2023")),
                      "stage" = rep(c("E16-E25")),
                      "orig.anno" = raw_list$Zhai_2023_paper$anno,             
                      "orig.sub_anno" = raw_list$Zhai_2023_paper$anno_sub,
                      "harmo.anno" = raw_list$Zhai_2023_paper$anno_harmo )

df_Zhai_2023_paper<- unique(df_Zhai_2023_paper)



#############merge#########################
label_all <- rbind(df_Sasaki,df_Nakamura,df_Zhai_2022,df_Ma,df_Niu,df_Yang,df_Gong,df_Zhai_2023_paper )

unique(label_all$harmo.anno)
##  [1] "Primitive.streak"                  "PGC"                              
##  [3] "ExE.Mesoderm"                      "Hypoblast"                        
##  [5] "ICM"                               "TE/TrB"                           
##  [7] "Epiblast"                          "ExE.Endoderm"                     
##  [9] "Mesoderm"                          "Endoderm"                         
## [11] "Non-Neural.Ectoderm"               "Unknown"                          
## [13] "Hemogenic.Endothelial.Progenitors" "Node"                             
## [15] "Neural.Ectoderm"

#check cells

# check cell numbers
cell_counts <- data.frame(Dataset = integer(), Cell_Count = integer())

for (i in seq_along(raw_list)) {
  
  # Get the number of cells in the current Seurat object
  cell_count <- ncol(raw_list[[i]])
  
  # Store the result in the data frame
  cell_counts <- rbind(cell_counts, data.frame(Dataset = names(raw_list[i]), Cell_Count = cell_count))
}

# Print the table of cell counts
print(cell_counts)
##           Dataset Cell_Count
## 1    Sasaki_paper         25
## 2  Nakamura_paper        390
## 3       Zhai_2022      64575
## 4        Ma_paper       1453
## 5       Niu_paper        600
## 6            Yang      15503
## 7            Gong      74466
## 8 Zhai_2023_paper       3680
print(sum(cell_counts$Cell_Count))
## [1] 160692
# Check QC
# Initialize an empty list to store the data frames
df_list <- lapply(seq_along(raw_list), function(i) {
  
  # Create a data frame with min and max values for QC metrics
  df <- data.frame(
    min_nCount_RNA = min(raw_list[[i]]$nCount_RNA),
    min_nFeature_RNA = min(raw_list[[i]]$nFeature_RNA),
    max_percent_mt = max(raw_list[[i]]$percent.mt)
  )
  
  # Add the dataset name as a column
  df$dataset <- names(raw_list)[i]
  
  return(df)
})

# Combine all data frames into one
df <- do.call(rbind, df_list)

# Print the table of cell counts
print(df)
##   min_nCount_RNA min_nFeature_RNA max_percent_mt         dataset
## 1       39449.43             9040      0.3960381    Sasaki_paper
## 2      999727.03             7802      0.3392961  Nakamura_paper
## 3        1002.00              502      8.3025558       Zhai_2022
## 4        3376.00             2018      0.8921330        Ma_paper
## 5       19254.98             2088      0.8626626       Niu_paper
## 6        1002.00              501      1.9547936            Yang
## 7        1979.00              641      6.7808708            Gong
## 8        1052.00              510      1.3338371 Zhai_2023_paper

save Rds

setwd("D:/xueying-work/afterPHD/Liu-lab/project1-embryo benchmarking/2024April/manuscript/R runing")
outputDir = getwd()


write.xlsx(label_all, file = "monkey_pre-label.xlsx")


for (i in 1:length(raw_list)) {

        colnames(raw_list[[i]]@meta.data) <- gsub("anno_sub", "sub_anno", colnames(raw_list[[i]]@meta.data) )
         colnames(raw_list[[i]]@meta.data) <- gsub("anno_harmo", "harmo.anno", colnames(raw_list[[i]]@meta.data) )
        
    }



saveRDS(raw_list, file = "Monkey_all.rds")

remove Sasaki dataset

##removeSasaki dataset due to  only 25 cells left
monkey <- readRDS("monkey_all.rds")
monkey <- monkey[!names(monkey) %in% "Sasaki_paper"]


saveRDS(monkey, file = "Monkey_all.rds")

session info

session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
##  setting  value
##  version  R version 4.3.3 (2024-02-29 ucrt)
##  os       Windows 11 x64 (build 22631)
##  system   x86_64, mingw32
##  ui       RTerm
##  language (EN)
##  collate  Chinese (Simplified)_China.utf8
##  ctype    Chinese (Simplified)_China.utf8
##  tz       Asia/Shanghai
##  date     2024-09-02
##  pandoc   3.1.11 @ C:/Program Files/RStudio/resources/app/bin/quarto/bin/tools/ (via rmarkdown)
## 
## ─ Packages ───────────────────────────────────────────────────────────────────
##  ! package          * version    date (UTC) lib source
##    abind              1.4-5      2016-07-21 [1] CRAN (R 4.3.1)
##    AnnotationDbi      1.64.1     2023-11-03 [1] Bioconductor
##    backports          1.5.0      2024-05-23 [1] CRAN (R 4.3.3)
##    beeswarm           0.4.0      2021-06-01 [1] CRAN (R 4.3.1)
##    Biobase            2.62.0     2023-10-24 [1] Bioconductor
##    BiocFileCache      2.11.1     2023-11-09 [1] Github (Bioconductor/BiocFileCache@7b7c4d3)
##    BiocGenerics       0.48.1     2023-11-01 [1] Bioconductor
##    biomaRt          * 2.58.2     2024-01-30 [1] Bioconductor 3.18 (R 4.3.2)
##    Biostrings         2.70.3     2024-03-13 [1] Bioconductor 3.18 (R 4.3.3)
##    bit                4.0.5      2022-11-15 [1] CRAN (R 4.3.2)
##    bit64              4.0.5      2020-08-30 [1] CRAN (R 4.3.2)
##    bitops             1.0-7      2021-04-24 [1] CRAN (R 4.3.1)
##    blob               1.2.4      2023-03-17 [1] CRAN (R 4.3.2)
##    bslib              0.7.0      2024-03-29 [1] CRAN (R 4.3.3)
##    cachem             1.1.0      2024-05-16 [1] CRAN (R 4.3.3)
##    Cairo              1.6-2      2023-11-28 [1] CRAN (R 4.3.2)
##    caTools            1.18.2     2021-03-28 [1] CRAN (R 4.3.2)
##    cellranger         1.1.0      2016-07-27 [1] CRAN (R 4.3.2)
##    checkmate          2.3.1      2023-12-04 [1] CRAN (R 4.3.3)
##    cli                3.6.1      2023-03-23 [1] CRAN (R 4.3.2)
##    cluster            2.1.6      2023-12-01 [2] CRAN (R 4.3.3)
##    clustree         * 0.5.1      2023-11-05 [1] CRAN (R 4.3.3)
##    codetools          0.2-19     2023-02-01 [2] CRAN (R 4.3.3)
##    colorspace         2.1-0      2023-01-23 [1] CRAN (R 4.3.2)
##    cowplot            1.1.3      2024-01-22 [1] CRAN (R 4.3.3)
##    crayon             1.5.3      2024-06-20 [1] CRAN (R 4.3.3)
##    curl               5.2.1      2024-03-01 [1] CRAN (R 4.3.3)
##    data.table         1.15.4     2024-03-30 [1] CRAN (R 4.3.3)
##    DBI                1.2.3      2024-06-02 [1] CRAN (R 4.3.3)
##    dbplyr             2.5.0      2024-03-19 [1] CRAN (R 4.3.3)
##    deldir             2.0-4      2024-02-28 [1] CRAN (R 4.3.2)
##    devtools         * 2.4.5      2022-10-11 [1] CRAN (R 4.3.2)
##    digest             0.6.35     2024-03-11 [1] CRAN (R 4.3.3)
##    dotCall64          1.1-1      2023-11-28 [1] CRAN (R 4.3.3)
##    dplyr            * 1.1.4      2023-11-17 [1] CRAN (R 4.3.3)
##    ellipsis           0.3.2      2021-04-29 [1] CRAN (R 4.3.2)
##    evaluate           0.24.0     2024-06-10 [1] CRAN (R 4.3.3)
##    fansi              1.0.6      2023-12-08 [1] CRAN (R 4.3.3)
##    farver             2.1.2      2024-05-13 [1] CRAN (R 4.3.3)
##    fastDummies        1.7.3      2023-07-06 [1] CRAN (R 4.3.2)
##    fastmap            1.2.0      2024-05-15 [1] CRAN (R 4.3.3)
##    filelock           1.0.3      2023-12-11 [1] CRAN (R 4.3.3)
##    fitdistrplus       1.2-1      2024-07-12 [1] CRAN (R 4.3.3)
##    fs                 1.6.4      2024-04-25 [1] CRAN (R 4.3.3)
##    future             1.33.2     2024-03-26 [1] CRAN (R 4.3.3)
##    future.apply       1.11.2     2024-03-28 [1] CRAN (R 4.3.3)
##    generics           0.1.3      2022-07-05 [1] CRAN (R 4.3.2)
##    GenomeInfoDb       1.38.8     2024-03-15 [1] Bioconductor 3.18 (R 4.3.3)
##    GenomeInfoDbData   1.2.11     2023-11-05 [1] Bioconductor
##    ggbeeswarm         0.7.2      2023-04-29 [1] CRAN (R 4.3.2)
##    ggforce            0.4.2      2024-02-19 [1] CRAN (R 4.3.3)
##    ggplot2          * 3.5.1      2024-04-23 [1] CRAN (R 4.3.3)
##    ggraph           * 2.2.1      2024-03-07 [1] CRAN (R 4.3.3)
##    ggrastr            1.0.2      2023-06-01 [1] CRAN (R 4.3.2)
##    ggrepel            0.9.5      2024-01-10 [1] CRAN (R 4.3.3)
##    ggridges           0.5.6      2024-01-23 [1] CRAN (R 4.3.3)
##    globals            0.16.3     2024-03-08 [1] CRAN (R 4.3.3)
##    glue               1.7.0      2024-01-09 [1] CRAN (R 4.3.3)
##    goftest            1.2-3      2021-10-07 [1] CRAN (R 4.3.1)
##    gplots           * 3.1.3.1    2024-02-02 [1] CRAN (R 4.3.3)
##    graphlayouts       1.1.1      2024-03-09 [1] CRAN (R 4.3.3)
##    gridExtra          2.3        2017-09-09 [1] CRAN (R 4.3.2)
##    gtable             0.3.5      2024-04-22 [1] CRAN (R 4.3.3)
##    gtools             3.9.5      2023-11-20 [1] CRAN (R 4.3.3)
##    highr              0.11       2024-05-26 [1] CRAN (R 4.3.3)
##    hms                1.1.3      2023-03-21 [1] CRAN (R 4.3.2)
##    htmltools          0.5.8.1    2024-04-04 [1] CRAN (R 4.3.3)
##    htmlwidgets        1.6.4      2023-12-06 [1] CRAN (R 4.3.3)
##    httpuv             1.6.15     2024-03-26 [1] CRAN (R 4.3.3)
##    httr               1.4.7      2023-08-15 [1] CRAN (R 4.3.2)
##    ica                1.0-3      2022-07-08 [1] CRAN (R 4.3.1)
##    igraph             2.0.3      2024-03-13 [1] CRAN (R 4.3.3)
##    IRanges            2.36.0     2023-10-24 [1] Bioconductor
##    irlba              2.3.5.1    2022-10-03 [1] CRAN (R 4.3.3)
##    jquerylib          0.1.4      2021-04-26 [1] CRAN (R 4.3.2)
##    jsonlite           1.8.8      2023-12-04 [1] CRAN (R 4.3.3)
##    KEGGREST           1.42.0     2023-10-24 [1] Bioconductor
##    KernSmooth         2.23-22    2023-07-10 [2] CRAN (R 4.3.3)
##    knitr              1.48       2024-07-07 [1] CRAN (R 4.3.3)
##    labeling           0.4.3      2023-08-29 [1] CRAN (R 4.3.1)
##    later              1.3.2      2023-12-06 [1] CRAN (R 4.3.3)
##    lattice            0.22-5     2023-10-24 [2] CRAN (R 4.3.3)
##    lazyeval           0.2.2      2019-03-15 [1] CRAN (R 4.3.2)
##    leiden             0.4.3.1    2023-11-17 [1] CRAN (R 4.3.3)
##    lifecycle          1.0.4      2023-11-07 [1] CRAN (R 4.3.3)
##    listenv            0.9.1      2024-01-29 [1] CRAN (R 4.3.3)
##    lmtest             0.9-40     2022-03-21 [1] CRAN (R 4.3.2)
##    magrittr           2.0.3      2022-03-30 [1] CRAN (R 4.3.2)
##    MASS               7.3-60.0.1 2024-01-13 [2] CRAN (R 4.3.3)
##  R Matrix           * 1.6-5      <NA>       [2] <NA>
##    matrixStats        1.3.0      2024-04-11 [1] CRAN (R 4.3.3)
##    memoise            2.0.1      2021-11-26 [1] CRAN (R 4.3.2)
##    mime               0.12       2021-09-28 [1] CRAN (R 4.3.1)
##    miniUI             0.1.1.1    2018-05-18 [1] CRAN (R 4.3.2)
##    munsell            0.5.1      2024-04-01 [1] CRAN (R 4.3.3)
##    nlme               3.1-164    2023-11-27 [2] CRAN (R 4.3.3)
##    parallelly         1.37.1     2024-02-29 [1] CRAN (R 4.3.3)
##    patchwork          1.2.0      2024-01-08 [1] CRAN (R 4.3.3)
##    pbapply            1.7-2      2023-06-27 [1] CRAN (R 4.3.2)
##    pillar             1.9.0      2023-03-22 [1] CRAN (R 4.3.2)
##    pkgbuild           1.4.4      2024-03-17 [1] CRAN (R 4.3.3)
##    pkgconfig          2.0.3      2019-09-22 [1] CRAN (R 4.3.2)
##    pkgload            1.4.0      2024-06-28 [1] CRAN (R 4.3.3)
##    plotly           * 4.10.4     2024-01-13 [1] CRAN (R 4.3.3)
##    plyr               1.8.9      2023-10-02 [1] CRAN (R 4.3.2)
##    png                0.1-8      2022-11-29 [1] CRAN (R 4.3.1)
##    polyclip           1.10-7     2024-07-23 [1] CRAN (R 4.3.3)
##    prettyunits        1.2.0      2023-09-24 [1] CRAN (R 4.3.2)
##    profvis            0.3.8      2023-05-02 [1] CRAN (R 4.3.2)
##    progress           1.2.3      2023-12-06 [1] CRAN (R 4.3.3)
##    progressr          0.14.0     2023-08-10 [1] CRAN (R 4.3.2)
##    promises           1.3.0      2024-04-05 [1] CRAN (R 4.3.3)
##    purrr              1.0.2      2023-08-10 [1] CRAN (R 4.3.2)
##    R6                 2.5.1      2021-08-19 [1] CRAN (R 4.3.2)
##    RANN               2.6.1      2019-01-08 [1] CRAN (R 4.3.2)
##    rappdirs           0.3.3      2021-01-31 [1] CRAN (R 4.3.2)
##    RColorBrewer     * 1.1-3      2022-04-03 [1] CRAN (R 4.3.1)
##    Rcpp               1.0.12     2024-01-09 [1] CRAN (R 4.3.3)
##    RcppAnnoy          0.0.22     2024-01-23 [1] CRAN (R 4.3.3)
##    RcppHNSW           0.6.0      2024-02-04 [1] CRAN (R 4.3.3)
##    RCurl              1.98-1.16  2024-07-11 [1] CRAN (R 4.3.3)
##    readxl           * 1.4.3      2023-07-06 [1] CRAN (R 4.3.2)
##    remotes            2.5.0      2024-03-17 [1] CRAN (R 4.3.3)
##    reshape          * 0.8.9      2022-04-12 [1] CRAN (R 4.3.2)
##    reshape2           1.4.4      2020-04-09 [1] CRAN (R 4.3.2)
##    reticulate         1.38.0     2024-06-19 [1] CRAN (R 4.3.3)
##  D rJava              1.0-11     2024-01-26 [1] CRAN (R 4.3.2)
##    rlang              1.1.2      2023-11-04 [1] CRAN (R 4.3.2)
##    rmarkdown          2.27       2024-05-17 [1] CRAN (R 4.3.3)
##    ROCR               1.0-11     2020-05-02 [1] CRAN (R 4.3.2)
##    RSpectra           0.16-2     2024-07-18 [1] CRAN (R 4.3.3)
##    RSQLite            2.3.7      2024-05-27 [1] CRAN (R 4.3.3)
##    rstudioapi         0.16.0     2024-03-24 [1] CRAN (R 4.3.3)
##    Rtsne              0.17       2023-12-07 [1] CRAN (R 4.3.3)
##    S4Vectors          0.40.2     2023-11-23 [1] Bioconductor
##    sass               0.4.9      2024-03-15 [1] CRAN (R 4.3.3)
##    scales             1.3.0      2023-11-28 [1] CRAN (R 4.3.3)
##    scattermore        1.2        2023-06-12 [1] CRAN (R 4.3.2)
##    sctransform        0.4.1      2023-10-19 [1] CRAN (R 4.3.2)
##    sessioninfo        1.2.2      2021-12-06 [1] CRAN (R 4.3.2)
##    Seurat           * 5.1.0      2024-05-10 [1] CRAN (R 4.3.3)
##    SeuratObject     * 5.0.2      2024-05-08 [1] CRAN (R 4.3.3)
##    shiny              1.8.1.1    2024-04-02 [1] CRAN (R 4.3.3)
##    sp               * 2.1-4      2024-04-30 [1] CRAN (R 4.3.3)
##    spam               2.10-0     2023-10-23 [1] CRAN (R 4.3.2)
##    spatstat.data      3.1-2      2024-06-21 [1] CRAN (R 4.3.3)
##    spatstat.explore   3.3-1      2024-07-15 [1] CRAN (R 4.3.3)
##    spatstat.geom      3.3-2      2024-07-15 [1] CRAN (R 4.3.3)
##    spatstat.random    3.3-1      2024-07-15 [1] CRAN (R 4.3.3)
##    spatstat.sparse    3.1-0      2024-06-21 [1] CRAN (R 4.3.3)
##    spatstat.univar    3.0-0      2024-06-28 [1] CRAN (R 4.3.3)
##    spatstat.utils     3.0-5      2024-06-17 [1] CRAN (R 4.3.3)
##    stringi            1.8.4      2024-05-06 [1] CRAN (R 4.3.3)
##    stringr          * 1.5.1      2023-11-14 [1] CRAN (R 4.3.3)
##    survival           3.5-8      2024-02-14 [2] CRAN (R 4.3.3)
##    tensor             1.5        2012-05-05 [1] CRAN (R 4.3.1)
##    tibble             3.2.1      2023-03-20 [1] CRAN (R 4.3.2)
##    tidygraph          1.3.1      2024-01-30 [1] CRAN (R 4.3.3)
##    tidyr              1.3.1      2024-01-24 [1] CRAN (R 4.3.3)
##    tidyselect         1.2.1      2024-03-11 [1] CRAN (R 4.3.3)
##    tweenr             2.0.3      2024-02-26 [1] CRAN (R 4.3.3)
##    urlchecker         1.0.1      2021-11-30 [1] CRAN (R 4.3.2)
##    usethis          * 2.2.3      2024-02-19 [1] CRAN (R 4.3.3)
##    utf8               1.2.4      2023-10-22 [1] CRAN (R 4.3.2)
##    uwot               0.2.2      2024-04-21 [1] CRAN (R 4.3.3)
##    vctrs              0.6.5      2023-12-01 [1] CRAN (R 4.3.3)
##    vipor              0.4.7      2023-12-18 [1] CRAN (R 4.3.3)
##    viridis            0.6.5      2024-01-29 [1] CRAN (R 4.3.3)
##    viridisLite        0.4.2      2023-05-02 [1] CRAN (R 4.3.2)
##    withr              3.0.0      2024-01-16 [1] CRAN (R 4.3.3)
##    xfun               0.44       2024-05-15 [1] CRAN (R 4.3.3)
##    xlsx             * 0.6.5      2020-11-10 [1] CRAN (R 4.3.3)
##    xlsxjars           0.6.1      2014-08-22 [1] CRAN (R 4.3.2)
##    XML                3.99-0.17  2024-06-25 [1] CRAN (R 4.3.3)
##    xml2               1.3.6      2023-12-04 [1] CRAN (R 4.3.3)
##    xtable             1.8-4      2019-04-21 [1] CRAN (R 4.3.2)
##    XVector            0.42.0     2023-10-24 [1] Bioconductor
##    yaml               2.3.8      2023-12-11 [1] CRAN (R 4.3.2)
##    zlibbioc           1.48.2     2024-03-13 [1] Bioconductor 3.18 (R 4.3.3)
##    zoo                1.8-12     2023-04-13 [1] CRAN (R 4.3.2)
## 
##  [1] C:/Users/fxyin/AppData/Local/R/win-library/4.3
##  [2] C:/Program Files/R/R-4.3.3/library
## 
##  D ── DLL MD5 mismatch, broken installation.
##  R ── Package was removed from disk.
## 
## ──────────────────────────────────────────────────────────────────────────────